OSDN Git Service

Merge tag 'perf-urgent-2023-09-10' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 10 Sep 2023 17:34:46 +0000 (10:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 10 Sep 2023 17:34:46 +0000 (10:34 -0700)
Pull x86 perf event fix from Ingo Molnar:
 "Work around a firmware bug in the uncore PMU driver, affecting certain
  Intel systems"

* tag 'perf-urgent-2023-09-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/uncore: Correct the number of CHAs on EMR

2002 files changed:
.gitignore
Documentation/ABI/testing/sysfs-class-led
Documentation/admin-guide/perf/alibaba_pmu.rst
Documentation/admin-guide/sysctl/kernel.rst
Documentation/bpf/btf.rst
Documentation/bpf/index.rst
Documentation/bpf/linux-notes.rst [moved from Documentation/bpf/standardization/linux-notes.rst with 100% similarity]
Documentation/bpf/llvm_reloc.rst
Documentation/bpf/standardization/abi.rst [new file with mode: 0644]
Documentation/bpf/standardization/index.rst
Documentation/bpf/standardization/instruction-set.rst
Documentation/core-api/printk-formats.rst
Documentation/dev-tools/kasan.rst
Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/i2c/cdns,i2c-r1p10.yaml
Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt [deleted file]
Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/i2c/i2c-arb.txt [deleted file]
Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml
Documentation/devicetree/bindings/i2c/nxp,pca9541.txt [deleted file]
Documentation/devicetree/bindings/i2c/nxp,pca9541.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/i2c/qcom,i2c-cci.yaml
Documentation/devicetree/bindings/i3c/i3c.yaml
Documentation/devicetree/bindings/input/azoteq,iqs7222.yaml
Documentation/devicetree/bindings/input/stmpe-keypad.txt [deleted file]
Documentation/devicetree/bindings/input/touchscreen/azoteq,iqs7211.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml
Documentation/devicetree/bindings/input/touchscreen/eeti,exc3000.yaml
Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml
Documentation/devicetree/bindings/input/touchscreen/stmpe.txt [deleted file]
Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt [deleted file]
Documentation/devicetree/bindings/interrupt-controller/st,stih407-irq-syscfg.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/leds/common.yaml
Documentation/devicetree/bindings/leds/leds-an30259a.txt [deleted file]
Documentation/devicetree/bindings/leds/leds-aw2013.yaml
Documentation/devicetree/bindings/leds/leds-group-multicolor.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/leds/nxp,pca953x.yaml
Documentation/devicetree/bindings/leds/nxp,pca995x.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/leds/panasonic,an30259a.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/leds/rohm,bd2606mvv.yaml
Documentation/devicetree/bindings/leds/rohm,bd71828-leds.yaml
Documentation/devicetree/bindings/media/i2c/ov5695.txt [deleted file]
Documentation/devicetree/bindings/media/i2c/ov7251.txt [deleted file]
Documentation/devicetree/bindings/media/i2c/ovti,ov5693.yaml
Documentation/devicetree/bindings/media/i2c/ovti,ov7251.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/media/rockchip-isp1.yaml
Documentation/devicetree/bindings/mfd/allwinner,sun6i-a31-prcm.yaml
Documentation/devicetree/bindings/mfd/allwinner,sun8i-a23-prcm.yaml
Documentation/devicetree/bindings/mfd/atmel-flexcom.txt
Documentation/devicetree/bindings/mfd/atmel-gpbr.txt
Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt
Documentation/devicetree/bindings/mfd/atmel-matrix.txt
Documentation/devicetree/bindings/mfd/atmel-smc.txt
Documentation/devicetree/bindings/mfd/brcm,bcm6318-gpio-sysctl.yaml
Documentation/devicetree/bindings/mfd/brcm,bcm63268-gpio-sysctl.yaml
Documentation/devicetree/bindings/mfd/brcm,bcm6328-gpio-sysctl.yaml
Documentation/devicetree/bindings/mfd/brcm,bcm6358-gpio-sysctl.yaml
Documentation/devicetree/bindings/mfd/brcm,bcm6362-gpio-sysctl.yaml
Documentation/devicetree/bindings/mfd/brcm,bcm6368-gpio-sysctl.yaml
Documentation/devicetree/bindings/mfd/maxim,max77693.yaml
Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.yaml
Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml
Documentation/devicetree/bindings/mfd/st,stmpe.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/mfd/st,stpmic1.yaml
Documentation/devicetree/bindings/mfd/stericsson,db8500-prcmu.yaml
Documentation/devicetree/bindings/mfd/stmpe.txt [deleted file]
Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt [deleted file]
Documentation/devicetree/bindings/pwm/brcm,kona-pwm.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml
Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
Documentation/devicetree/bindings/remoteproc/qcom,msm8996-mss-pil.yaml
Documentation/devicetree/bindings/remoteproc/qcom,pas-common.yaml
Documentation/devicetree/bindings/remoteproc/qcom,qcs404-pas.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sc7180-pas.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sc8180x-pas.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sc8280xp-pas.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sdx55-pas.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sm6115-pas.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sm6350-pas.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sm8150-pas.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sm8350-pas.yaml
Documentation/devicetree/bindings/remoteproc/qcom,sm8550-pas.yaml
Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.yaml
Documentation/devicetree/bindings/rtc/intersil,isl12022.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/maxim,ds3231.txt [deleted file]
Documentation/devicetree/bindings/rtc/nxp,pcf2127.yaml
Documentation/devicetree/bindings/rtc/st,m48t86.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
Documentation/devicetree/bindings/sound/fsl,easrc.yaml
Documentation/devicetree/bindings/thermal/loongson,ls2k-thermal.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
Documentation/devicetree/bindings/watchdog/marvell,cn10624-wdt.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
Documentation/devicetree/bindings/watchdog/ti,rti-wdt.yaml
Documentation/driver-api/libata.rst
Documentation/features/debug/KASAN/arch-support.txt
Documentation/features/debug/kcov/arch-support.txt
Documentation/features/debug/kgdb/arch-support.txt
Documentation/filesystems/ceph.rst
Documentation/filesystems/gfs2-glocks.rst
Documentation/filesystems/proc.rst
Documentation/gpu/amdgpu/driver-misc.rst
Documentation/kbuild/kconfig.rst
Documentation/kbuild/llvm.rst
Documentation/process/maintainer-netdev.rst
Documentation/riscv/hwprobe.rst
Documentation/translations/zh_CN/dev-tools/kasan.rst
Documentation/userspace-api/netlink/intro.rst
Documentation/virt/kvm/api.rst
MAINTAINERS
Makefile
arch/alpha/include/asm/Kbuild
arch/alpha/lib/callback_srm.S
arch/alpha/lib/clear_page.S
arch/alpha/lib/clear_user.S
arch/alpha/lib/copy_page.S
arch/alpha/lib/copy_user.S
arch/alpha/lib/csum_ipv6_magic.S
arch/alpha/lib/divide.S
arch/alpha/lib/ev6-clear_page.S
arch/alpha/lib/ev6-clear_user.S
arch/alpha/lib/ev6-copy_page.S
arch/alpha/lib/ev6-copy_user.S
arch/alpha/lib/ev6-csum_ipv6_magic.S
arch/alpha/lib/ev6-divide.S
arch/alpha/lib/ev6-memchr.S
arch/alpha/lib/ev6-memcpy.S
arch/alpha/lib/ev6-memset.S
arch/alpha/lib/ev67-strcat.S
arch/alpha/lib/ev67-strchr.S
arch/alpha/lib/ev67-strlen.S
arch/alpha/lib/ev67-strncat.S
arch/alpha/lib/ev67-strrchr.S
arch/alpha/lib/memchr.S
arch/alpha/lib/memmove.S
arch/alpha/lib/memset.S
arch/alpha/lib/strcat.S
arch/alpha/lib/strchr.S
arch/alpha/lib/strcpy.S
arch/alpha/lib/strlen.S
arch/alpha/lib/strncat.S
arch/alpha/lib/strncpy.S
arch/alpha/lib/strrchr.S
arch/alpha/lib/udiv-qrnnd.S
arch/arc/Kconfig
arch/arc/Makefile
arch/arc/include/asm/arcregs.h
arch/arc/include/asm/atomic-llsc.h
arch/arc/include/asm/atomic64-arcv2.h
arch/arc/include/asm/current.h
arch/arc/include/asm/dwarf.h
arch/arc/include/asm/entry-arcv2.h
arch/arc/include/asm/entry-compact.h
arch/arc/include/asm/entry.h
arch/arc/include/asm/irq.h
arch/arc/include/asm/mmu.h
arch/arc/include/asm/processor.h
arch/arc/include/asm/ptrace.h
arch/arc/include/asm/setup.h
arch/arc/include/asm/smp.h
arch/arc/include/asm/thread_info.h
arch/arc/include/asm/uaccess.h
arch/arc/kernel/Makefile
arch/arc/kernel/asm-offsets.c
arch/arc/kernel/ctx_sw.c [deleted file]
arch/arc/kernel/ctx_sw_asm.S
arch/arc/kernel/devtree.c
arch/arc/kernel/entry-arcv2.S
arch/arc/kernel/entry-compact.S
arch/arc/kernel/entry.S
arch/arc/kernel/intc-arcv2.c
arch/arc/kernel/kgdb.c
arch/arc/kernel/mcip.c
arch/arc/kernel/process.c
arch/arc/kernel/ptrace.c
arch/arc/kernel/setup.c
arch/arc/kernel/signal.c
arch/arc/kernel/smp.c
arch/arc/kernel/stacktrace.c
arch/arc/kernel/traps.c
arch/arc/kernel/troubleshoot.c
arch/arc/lib/memset-archs.S
arch/arc/mm/cache.c
arch/arc/mm/extable.c
arch/arc/mm/fault.c
arch/arc/mm/init.c
arch/arc/mm/tlb.c
arch/arc/plat-axs10x/axs10x.c
arch/arm/configs/dram_0x00000000.config
arch/arm/configs/dram_0xc0000000.config
arch/arm/configs/dram_0xd0000000.config
arch/arm/configs/lpae.config
arch/arm/include/asm/arm_pmuv3.h
arch/arm/include/asm/ide.h [deleted file]
arch/arm64/configs/virt.config
arch/arm64/include/asm/efi.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/kvm_nested.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/tlbflush.h
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/idreg-override.c
arch/arm64/kvm/Kconfig
arch/arm64/kvm/arm.c
arch/arm64/kvm/emulate-nested.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/include/nvhe/mm.h
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/hyp/nvhe/mm.c
arch/arm64/kvm/hyp/nvhe/setup.c
arch/arm64/kvm/hyp/nvhe/switch.c
arch/arm64/kvm/hyp/nvhe/tlb.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/hyp/vhe/tlb.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/nested.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/pmu.c
arch/arm64/kvm/reset.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/trace_arm.h
arch/arm64/kvm/vgic/vgic.h
arch/arm64/lib/csum.c
arch/arm64/tools/cpucaps
arch/arm64/tools/sysreg
arch/ia64/include/asm/Kbuild
arch/ia64/kernel/entry.S
arch/ia64/kernel/esi_stub.S
arch/ia64/kernel/head.S
arch/ia64/kernel/ivt.S
arch/ia64/kernel/pal.S
arch/ia64/lib/clear_page.S
arch/ia64/lib/clear_user.S
arch/ia64/lib/copy_page.S
arch/ia64/lib/copy_page_mck.S
arch/ia64/lib/copy_user.S
arch/ia64/lib/flush.S
arch/ia64/lib/idiv32.S
arch/ia64/lib/idiv64.S
arch/ia64/lib/ip_fast_csum.S
arch/ia64/lib/memcpy.S
arch/ia64/lib/memcpy_mck.S
arch/ia64/lib/memset.S
arch/ia64/lib/strlen.S
arch/ia64/lib/strncpy_from_user.S
arch/ia64/lib/strnlen_user.S
arch/ia64/lib/xor.S
arch/loongarch/Kconfig
arch/loongarch/Makefile
arch/loongarch/configs/loongson3_defconfig
arch/loongarch/include/asm/asm-prototypes.h
arch/loongarch/include/asm/asmmacro.h
arch/loongarch/include/asm/kasan.h [new file with mode: 0644]
arch/loongarch/include/asm/kfence.h [new file with mode: 0644]
arch/loongarch/include/asm/kgdb.h [new file with mode: 0644]
arch/loongarch/include/asm/lbt.h [new file with mode: 0644]
arch/loongarch/include/asm/loongarch.h
arch/loongarch/include/asm/mmzone.h
arch/loongarch/include/asm/page.h
arch/loongarch/include/asm/pgalloc.h
arch/loongarch/include/asm/pgtable.h
arch/loongarch/include/asm/processor.h
arch/loongarch/include/asm/setup.h
arch/loongarch/include/asm/stackframe.h
arch/loongarch/include/asm/string.h
arch/loongarch/include/asm/switch_to.h
arch/loongarch/include/asm/thread_info.h
arch/loongarch/include/asm/xor.h [new file with mode: 0644]
arch/loongarch/include/asm/xor_simd.h [new file with mode: 0644]
arch/loongarch/include/uapi/asm/ptrace.h
arch/loongarch/include/uapi/asm/sigcontext.h
arch/loongarch/kernel/Makefile
arch/loongarch/kernel/asm-offsets.c
arch/loongarch/kernel/cpu-probe.c
arch/loongarch/kernel/entry.S
arch/loongarch/kernel/fpu.S
arch/loongarch/kernel/head.S
arch/loongarch/kernel/kfpu.c
arch/loongarch/kernel/kgdb.c [new file with mode: 0644]
arch/loongarch/kernel/lbt.S [new file with mode: 0644]
arch/loongarch/kernel/numa.c
arch/loongarch/kernel/process.c
arch/loongarch/kernel/ptrace.c
arch/loongarch/kernel/relocate.c
arch/loongarch/kernel/setup.c
arch/loongarch/kernel/signal.c
arch/loongarch/kernel/stacktrace.c
arch/loongarch/kernel/traps.c
arch/loongarch/lib/Makefile
arch/loongarch/lib/clear_user.S
arch/loongarch/lib/copy_user.S
arch/loongarch/lib/memcpy.S
arch/loongarch/lib/memmove.S
arch/loongarch/lib/memset.S
arch/loongarch/lib/xor_simd.c [new file with mode: 0644]
arch/loongarch/lib/xor_simd.h [new file with mode: 0644]
arch/loongarch/lib/xor_simd_glue.c [new file with mode: 0644]
arch/loongarch/lib/xor_template.c [new file with mode: 0644]
arch/loongarch/mm/Makefile
arch/loongarch/mm/cache.c
arch/loongarch/mm/fault.c
arch/loongarch/mm/init.c
arch/loongarch/mm/kasan_init.c [new file with mode: 0644]
arch/loongarch/mm/mmap.c
arch/loongarch/mm/pgtable.c
arch/loongarch/vdso/Makefile
arch/m68k/include/asm/ide.h [deleted file]
arch/microblaze/include/asm/page.h
arch/microblaze/include/asm/setup.h
arch/microblaze/kernel/reset.c
arch/microblaze/mm/init.c
arch/mips/Makefile
arch/mips/bmips/setup.c
arch/mips/cavium-octeon/flash_setup.c
arch/mips/cavium-octeon/octeon-memcpy.S
arch/mips/cavium-octeon/octeon-platform.c
arch/mips/configs/ip22_defconfig
arch/mips/configs/loongson3_defconfig
arch/mips/configs/malta_defconfig
arch/mips/configs/malta_kvm_defconfig
arch/mips/configs/maltaup_xpa_defconfig
arch/mips/configs/rm200_defconfig
arch/mips/include/asm/Kbuild
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/mach-loongson32/loongson1.h
arch/mips/include/asm/mach-loongson32/regs-clk.h [deleted file]
arch/mips/include/asm/mach-loongson32/regs-rtc.h [deleted file]
arch/mips/kernel/mcount.S
arch/mips/kernel/octeon_switch.S
arch/mips/kernel/r2300_fpu.S
arch/mips/kernel/r2300_switch.S
arch/mips/kernel/r4k_fpu.S
arch/mips/kvm/mips.c
arch/mips/kvm/mmu.c
arch/mips/lantiq/irq.c
arch/mips/lantiq/xway/dcdc.c
arch/mips/lantiq/xway/gptu.c
arch/mips/lantiq/xway/sysctrl.c
arch/mips/lantiq/xway/vmmc.c
arch/mips/lib/csum_partial.S
arch/mips/lib/memcpy.S
arch/mips/lib/memset.S
arch/mips/lib/strncpy_user.S
arch/mips/lib/strnlen_user.S
arch/mips/loongson32/common/platform.c
arch/mips/loongson64/smp.c
arch/mips/mm/page-funcs.S
arch/mips/mm/tlb-funcs.S
arch/mips/pci/pci-lantiq.c
arch/mips/pci/pci-rt2880.c
arch/mips/pic32/pic32mzda/config.c
arch/mips/ralink/ill_acc.c
arch/mips/ralink/irq.c
arch/mips/ralink/of.c
arch/mips/ralink/prom.c
arch/mips/txx9/generic/pci.c
arch/mips/vdso/vdso.lds.S
arch/openrisc/include/asm/bug.h [new file with mode: 0644]
arch/openrisc/include/asm/page.h
arch/openrisc/include/asm/processor.h
arch/openrisc/kernel/process.c
arch/openrisc/kernel/ptrace.c
arch/openrisc/kernel/signal.c
arch/openrisc/kernel/smp.c
arch/openrisc/kernel/time.c
arch/openrisc/kernel/traps.c
arch/openrisc/mm/fault.c
arch/openrisc/mm/init.c
arch/openrisc/mm/ioremap.c
arch/openrisc/mm/tlb.c
arch/parisc/include/asm/ide.h [deleted file]
arch/powerpc/configs/disable-werror.config
arch/powerpc/configs/security.config
arch/powerpc/include/asm/ide.h [deleted file]
arch/riscv/Kconfig
arch/riscv/Kconfig.errata
arch/riscv/configs/32-bit.config
arch/riscv/configs/64-bit.config
arch/riscv/errata/Makefile
arch/riscv/errata/andes/Makefile [new file with mode: 0644]
arch/riscv/errata/andes/errata.c [new file with mode: 0644]
arch/riscv/errata/thead/errata.c
arch/riscv/include/asm/alternative.h
arch/riscv/include/asm/cpufeature.h
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/dma-noncoherent.h [new file with mode: 0644]
arch/riscv/include/asm/efi.h
arch/riscv/include/asm/errata_list.h
arch/riscv/include/asm/kvm_host.h
arch/riscv/include/asm/kvm_vcpu_vector.h
arch/riscv/include/asm/page.h
arch/riscv/include/asm/patch.h
arch/riscv/include/asm/vendorid_list.h
arch/riscv/include/uapi/asm/kvm.h
arch/riscv/include/uapi/asm/ptrace.h
arch/riscv/kernel/Makefile
arch/riscv/kernel/alternative.c
arch/riscv/kernel/copy-unaligned.S [new file with mode: 0644]
arch/riscv/kernel/copy-unaligned.h [new file with mode: 0644]
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/image-vars.h
arch/riscv/kernel/patch.c
arch/riscv/kernel/pi/Makefile
arch/riscv/kernel/pi/cmdline_early.c
arch/riscv/kernel/pi/fdt_early.c [new file with mode: 0644]
arch/riscv/kernel/ptrace.c
arch/riscv/kernel/setup.c
arch/riscv/kernel/smpboot.c
arch/riscv/kvm/Makefile
arch/riscv/kvm/aia.c
arch/riscv/kvm/mmu.c
arch/riscv/kvm/vcpu.c
arch/riscv/kvm/vcpu_fp.c
arch/riscv/kvm/vcpu_onereg.c [new file with mode: 0644]
arch/riscv/kvm/vcpu_sbi.c
arch/riscv/kvm/vcpu_timer.c
arch/riscv/kvm/vcpu_vector.c
arch/riscv/mm/dma-noncoherent.c
arch/riscv/mm/init.c
arch/riscv/mm/pmem.c
arch/riscv/net/bpf_jit.h
arch/riscv/net/bpf_jit_comp64.c
arch/riscv/net/bpf_jit_core.c
arch/s390/boot/ipl_parm.c
arch/s390/boot/startup.c
arch/s390/boot/vmem.c
arch/s390/configs/btf.config
arch/s390/configs/kasan.config
arch/s390/include/asm/airq.h
arch/s390/include/asm/dma.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/sections.h
arch/s390/include/asm/set_memory.h
arch/s390/include/asm/setup.h
arch/s390/include/asm/uv.h
arch/s390/include/uapi/asm/kvm.h
arch/s390/kernel/early.c
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/setup.c
arch/s390/kernel/uv.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/pv.c
arch/s390/mm/dump_pagetables.c
arch/s390/mm/fault.c
arch/s390/mm/init.c
arch/s390/mm/pageattr.c
arch/s390/mm/vmem.c
arch/s390/net/bpf_jit_comp.c
arch/sh/boards/mach-ap325rxa/setup.c
arch/sh/boards/mach-ecovec24/setup.c
arch/sh/boards/mach-kfr2r09/setup.c
arch/sh/boards/mach-migor/setup.c
arch/sh/boards/mach-se/7724/setup.c
arch/sh/drivers/push-switch.c
arch/sparc/include/asm/Kbuild
arch/sparc/include/asm/ide.h [deleted file]
arch/sparc/kernel/entry.S
arch/sparc/kernel/head_32.S
arch/sparc/kernel/head_64.S
arch/sparc/lib/U1memcpy.S
arch/sparc/lib/VISsave.S
arch/sparc/lib/ashldi3.S
arch/sparc/lib/ashrdi3.S
arch/sparc/lib/atomic_64.S
arch/sparc/lib/bitops.S
arch/sparc/lib/blockops.S
arch/sparc/lib/bzero.S
arch/sparc/lib/checksum_32.S
arch/sparc/lib/checksum_64.S
arch/sparc/lib/clear_page.S
arch/sparc/lib/copy_in_user.S
arch/sparc/lib/copy_page.S
arch/sparc/lib/copy_user.S
arch/sparc/lib/csum_copy.S
arch/sparc/lib/divdi3.S
arch/sparc/lib/ffs.S
arch/sparc/lib/fls.S
arch/sparc/lib/fls64.S
arch/sparc/lib/hweight.S
arch/sparc/lib/ipcsum.S
arch/sparc/lib/locks.S
arch/sparc/lib/lshrdi3.S
arch/sparc/lib/mcount.S
arch/sparc/lib/memcmp.S
arch/sparc/lib/memcpy.S
arch/sparc/lib/memmove.S
arch/sparc/lib/memscan_32.S
arch/sparc/lib/memscan_64.S
arch/sparc/lib/memset.S
arch/sparc/lib/muldi3.S
arch/sparc/lib/multi3.S
arch/sparc/lib/strlen.S
arch/sparc/lib/strncmp_32.S
arch/sparc/lib/strncmp_64.S
arch/sparc/lib/xor.S
arch/sparc/mm/tlb.c
arch/x86/Makefile
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/kexec.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_page_track.h
arch/x86/include/asm/reboot.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/virtext.h [deleted file]
arch/x86/include/asm/vmx.h
arch/x86/kernel/crash.c
arch/x86/kernel/reboot.c
arch/x86/kvm/Kconfig
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/emulate.c
arch/x86/kvm/governed_features.h [new file with mode: 0644]
arch/x86/kvm/hyperv.c
arch/x86/kvm/kvm_emulate.h
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmu_internal.h
arch/x86/kvm/mmu/page_track.c
arch/x86/kvm/mmu/page_track.h [new file with mode: 0644]
arch/x86/kvm/mmu/paging_tmpl.h
arch/x86/kvm/mmu/spte.c
arch/x86/kvm/mmu/spte.h
arch/x86/kvm/mmu/tdp_iter.c
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/pmu.c
arch/x86/kvm/reverse_cpuid.h
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/hyperv.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/nested.h
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/xtensa/Kconfig
arch/xtensa/include/asm/core.h
arch/xtensa/include/asm/mtd-xip.h [new file with mode: 0644]
arch/xtensa/include/asm/sections.h
arch/xtensa/kernel/perf_event.c
arch/xtensa/kernel/setup.c
arch/xtensa/kernel/vmlinux.lds.S
block/blk-map.c
block/blk-throttle.c
block/blk-throttle.h
block/fops.c
block/ioctl.c
drivers/Kconfig
drivers/Makefile
drivers/accel/ivpu/ivpu_jsm_msg.c
drivers/acpi/thermal.c
drivers/ata/ahci.c
drivers/ata/ahci_ceva.c
drivers/ata/ahci_dwc.c
drivers/ata/ahci_mtk.c
drivers/ata/ahci_mvebu.c
drivers/ata/ahci_octeon.c
drivers/ata/ahci_qoriq.c
drivers/ata/ahci_seattle.c
drivers/ata/ahci_sunxi.c
drivers/ata/ahci_tegra.c
drivers/ata/ahci_xgene.c
drivers/ata/libahci.c
drivers/ata/libahci_platform.c
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/ata/libata-sata.c
drivers/ata/libata-scsi.c
drivers/ata/libata-sff.c
drivers/ata/libata.h
drivers/ata/pata_arasan_cf.c
drivers/ata/pata_buddha.c
drivers/ata/pata_ep93xx.c
drivers/ata/pata_falcon.c
drivers/ata/pata_ftide010.c
drivers/ata/pata_gayle.c
drivers/ata/pata_imx.c
drivers/ata/pata_ixp4xx_cf.c
drivers/ata/pata_mpc52xx.c
drivers/ata/pata_pxa.c
drivers/ata/pata_rb532_cf.c
drivers/ata/pata_sl82c105.c
drivers/ata/sata_dwc_460ex.c
drivers/ata/sata_fsl.c
drivers/ata/sata_gemini.c
drivers/ata/sata_highbank.c
drivers/ata/sata_inic162x.c
drivers/ata/sata_mv.c
drivers/ata/sata_nv.c
drivers/ata/sata_rcar.c
drivers/ata/sata_sil24.c
drivers/ata/sata_sx4.c
drivers/block/drbd/drbd_main.c
drivers/block/null_blk/main.c
drivers/block/rbd.c
drivers/cache/Kconfig [new file with mode: 0644]
drivers/cache/Makefile [new file with mode: 0644]
drivers/cache/ax45mp_cache.c [new file with mode: 0644]
drivers/char/tpm/tpm_crb.c
drivers/counter/Kconfig
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/pcc-cpufreq.c
drivers/firmware/efi/libstub/Makefile
drivers/firmware/efi/libstub/arm64-stub.c
drivers/firmware/efi/libstub/efistub.h
drivers/firmware/efi/libstub/kaslr.c [new file with mode: 0644]
drivers/firmware/efi/libstub/riscv-stub.c
drivers/gpio/gpio-zynq.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
drivers/gpu/drm/amd/amdgpu/soc15.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
drivers/gpu/drm/amd/display/dc/Makefile
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c
drivers/gpu/drm/amd/display/dc/dcn301/dcn301_init.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
drivers/gpu/drm/amd/display/modules/freesync/freesync.c
drivers/gpu/drm/amd/include/amd_shared.h
drivers/gpu/drm/amd/include/atomfirmware.h
drivers/gpu/drm/amd/include/discovery.h
drivers/gpu/drm/amd/pm/amdgpu_pm.c
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_pmfw.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gvt/gtt.c
drivers/gpu/drm/i915/gvt/gtt.h
drivers/gpu/drm/i915/gvt/gvt.h
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/gpu/drm/i915/gvt/page_track.c
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/nouveau/dispnv04/crtc.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_chan.c
drivers/gpu/drm/nouveau/nouveau_dmem.c
drivers/gpu/drm/nouveau/nouveau_exec.c
drivers/gpu/drm/nouveau/nouveau_fence.c
drivers/gpu/drm/nouveau/nouveau_fence.h
drivers/gpu/drm/nouveau/nouveau_gem.c
drivers/hwspinlock/omap_hwspinlock.c
drivers/hwspinlock/qcom_hwspinlock.c
drivers/hwspinlock/u8500_hsem.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-ali15x3.c
drivers/i2c/busses/i2c-at91-core.c
drivers/i2c/busses/i2c-at91-master.c
drivers/i2c/busses/i2c-au1550.c
drivers/i2c/busses/i2c-bcm-iproc.c
drivers/i2c/busses/i2c-bcm2835.c
drivers/i2c/busses/i2c-brcmstb.c
drivers/i2c/busses/i2c-cpm.c
drivers/i2c/busses/i2c-davinci.c
drivers/i2c/busses/i2c-designware-master.c
drivers/i2c/busses/i2c-designware-platdrv.c
drivers/i2c/busses/i2c-dln2.c
drivers/i2c/busses/i2c-emev2.c
drivers/i2c/busses/i2c-exynos5.c
drivers/i2c/busses/i2c-gxp.c
drivers/i2c/busses/i2c-hisi.c
drivers/i2c/busses/i2c-hix5hd2.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-ibm_iic.c
drivers/i2c/busses/i2c-img-scb.c
drivers/i2c/busses/i2c-imx-lpi2c.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-jz4780.c
drivers/i2c/busses/i2c-kempld.c
drivers/i2c/busses/i2c-lpc2k.c
drivers/i2c/busses/i2c-meson.c
drivers/i2c/busses/i2c-microchip-corei2c.c
drivers/i2c/busses/i2c-mlxbf.c
drivers/i2c/busses/i2c-mlxcpld.c
drivers/i2c/busses/i2c-mpc.c
drivers/i2c/busses/i2c-mt65xx.c
drivers/i2c/busses/i2c-mt7621.c
drivers/i2c/busses/i2c-mxs.c
drivers/i2c/busses/i2c-nforce2.c
drivers/i2c/busses/i2c-nomadik.c
drivers/i2c/busses/i2c-npcm7xx.c
drivers/i2c/busses/i2c-ocores.c
drivers/i2c/busses/i2c-owl.c
drivers/i2c/busses/i2c-pca-platform.c
drivers/i2c/busses/i2c-pnx.c
drivers/i2c/busses/i2c-pxa-pci.c
drivers/i2c/busses/i2c-pxa.c
drivers/i2c/busses/i2c-qcom-cci.c
drivers/i2c/busses/i2c-qcom-geni.c
drivers/i2c/busses/i2c-qup.c
drivers/i2c/busses/i2c-rcar.c
drivers/i2c/busses/i2c-riic.c
drivers/i2c/busses/i2c-s3c2410.c
drivers/i2c/busses/i2c-sh_mobile.c
drivers/i2c/busses/i2c-sis5595.c
drivers/i2c/busses/i2c-sprd.c
drivers/i2c/busses/i2c-st.c
drivers/i2c/busses/i2c-stm32f4.c
drivers/i2c/busses/i2c-stm32f7.c
drivers/i2c/busses/i2c-synquacer.c
drivers/i2c/busses/i2c-tegra-bpmp.c
drivers/i2c/busses/i2c-tegra.c
drivers/i2c/busses/i2c-tiny-usb.c
drivers/i2c/busses/i2c-virtio.c
drivers/i2c/busses/i2c-xlp9xx.c
drivers/i2c/muxes/Kconfig
drivers/i2c/muxes/i2c-mux-gpmux.c
drivers/i2c/muxes/i2c-mux-ltc4306.c
drivers/i2c/muxes/i2c-mux-pca954x.c
drivers/i3c/master.c
drivers/i3c/master/ast2600-i3c-master.c
drivers/i3c/master/i3c-master-cdns.c
drivers/i3c/master/mipi-i3c-hci/cmd_v1.c
drivers/i3c/master/svc-i3c-master.c
drivers/input/gameport/Kconfig
drivers/input/gameport/gameport.c
drivers/input/joystick/xpad.c
drivers/input/keyboard/adp5588-keys.c
drivers/input/keyboard/amikbd.c
drivers/input/keyboard/bcm-keypad.c
drivers/input/keyboard/gpio_keys.c
drivers/input/keyboard/gpio_keys_polled.c
drivers/input/keyboard/lm8323.c
drivers/input/keyboard/lm8333.c
drivers/input/keyboard/lpc32xx-keys.c
drivers/input/keyboard/mcs_touchkey.c
drivers/input/keyboard/nomadik-ske-keypad.c
drivers/input/keyboard/nspire-keypad.c
drivers/input/keyboard/omap4-keypad.c
drivers/input/keyboard/opencores-kbd.c
drivers/input/keyboard/pinephone-keyboard.c
drivers/input/keyboard/pxa27x_keypad.c
drivers/input/keyboard/qt1070.c
drivers/input/keyboard/qt2160.c
drivers/input/keyboard/sun4i-lradc-keys.c
drivers/input/keyboard/tca6416-keypad.c
drivers/input/keyboard/tegra-kbc.c
drivers/input/keyboard/tm2-touchkey.c
drivers/input/misc/Kconfig
drivers/input/misc/cpcap-pwrbutton.c
drivers/input/misc/da9063_onkey.c
drivers/input/misc/gpio-vibra.c
drivers/input/misc/iqs269a.c
drivers/input/misc/iqs626a.c
drivers/input/misc/iqs7222.c
drivers/input/misc/mma8450.c
drivers/input/misc/pm8941-pwrkey.c
drivers/input/misc/pm8xxx-vibrator.c
drivers/input/misc/pmic8xxx-pwrkey.c
drivers/input/misc/pwm-beeper.c
drivers/input/misc/pwm-vibra.c
drivers/input/misc/rotary_encoder.c
drivers/input/misc/sparcspkr.c
drivers/input/mouse/elan_i2c_core.c
drivers/input/mouse/psmouse-smbus.c
drivers/input/serio/apbps2.c
drivers/input/serio/i8042-acpipnpio.h
drivers/input/serio/i8042-sparcio.h
drivers/input/serio/rpckbd.c
drivers/input/serio/xilinx_ps2.c
drivers/input/touchscreen/Kconfig
drivers/input/touchscreen/Makefile
drivers/input/touchscreen/bu21013_ts.c
drivers/input/touchscreen/bu21029_ts.c
drivers/input/touchscreen/chipone_icn8318.c
drivers/input/touchscreen/cy8ctma140.c
drivers/input/touchscreen/cyttsp5.c
drivers/input/touchscreen/edt-ft5x06.c
drivers/input/touchscreen/ektf2127.c
drivers/input/touchscreen/elants_i2c.c
drivers/input/touchscreen/exc3000.c
drivers/input/touchscreen/goodix.c
drivers/input/touchscreen/ili210x.c
drivers/input/touchscreen/iqs5xx.c
drivers/input/touchscreen/iqs7211.c [new file with mode: 0644]
drivers/input/touchscreen/lpc32xx_ts.c
drivers/input/touchscreen/melfas_mip4.c
drivers/input/touchscreen/mms114.c
drivers/input/touchscreen/novatek-nvt-ts.c
drivers/input/touchscreen/pixcir_i2c_ts.c
drivers/input/touchscreen/raydium_i2c_ts.c
drivers/input/touchscreen/resistive-adc-touch.c
drivers/input/touchscreen/silead.c
drivers/input/touchscreen/sis_i2c.c
drivers/input/touchscreen/surface3_spi.c
drivers/input/touchscreen/sx8654.c
drivers/input/touchscreen/ti_am335x_tsc.c
drivers/leds/Kconfig
drivers/leds/Makefile
drivers/leds/blink/Kconfig
drivers/leds/flash/Kconfig
drivers/leds/flash/leds-qcom-flash.c
drivers/leds/led-class-multicolor.c
drivers/leds/led-class.c
drivers/leds/led-core.c
drivers/leds/leds-an30259a.c
drivers/leds/leds-ariel.c
drivers/leds/leds-aw200xx.c
drivers/leds/leds-aw2013.c
drivers/leds/leds-cpcap.c
drivers/leds/leds-cr0014114.c
drivers/leds/leds-ip30.c
drivers/leds/leds-is31fl32xx.c
drivers/leds/leds-lp5521.c
drivers/leds/leds-lp5523.c
drivers/leds/leds-lp5562.c
drivers/leds/leds-lp8501.c
drivers/leds/leds-mlxreg.c
drivers/leds/leds-ns2.c
drivers/leds/leds-pca9532.c
drivers/leds/leds-pca995x.c [new file with mode: 0644]
drivers/leds/leds-pm8058.c
drivers/leds/leds-pwm.c
drivers/leds/leds-spi-byte.c
drivers/leds/leds-syscon.c
drivers/leds/leds-ti-lmu-common.c
drivers/leds/leds-tlc591xx.c
drivers/leds/leds-turris-omnia.c
drivers/leds/rgb/Kconfig
drivers/leds/rgb/Makefile
drivers/leds/rgb/leds-group-multicolor.c [new file with mode: 0644]
drivers/leds/rgb/leds-qcom-lpg.c
drivers/leds/simple/Kconfig
drivers/leds/simple/Makefile
drivers/leds/simple/simatic-ipc-leds-gpio-core.c
drivers/leds/simple/simatic-ipc-leds-gpio-elkhartlake.c [new file with mode: 0644]
drivers/leds/simple/simatic-ipc-leds-gpio.h
drivers/leds/trigger/ledtrig-netdev.c
drivers/leds/trigger/ledtrig-tty.c
drivers/leds/uleds.c
drivers/mailbox/arm_mhu.c
drivers/mailbox/arm_mhu_db.c
drivers/mailbox/bcm-flexrm-mailbox.c
drivers/mailbox/bcm-pdc-mailbox.c
drivers/mailbox/hi3660-mailbox.c
drivers/mailbox/hi6220-mailbox.c
drivers/mailbox/imx-mailbox.c
drivers/mailbox/mailbox-mpfs.c
drivers/mailbox/mailbox-test.c
drivers/mailbox/mailbox.c
drivers/mailbox/mtk-adsp-mailbox.c
drivers/mailbox/mtk-cmdq-mailbox.c
drivers/mailbox/omap-mailbox.c
drivers/mailbox/platform_mhu.c
drivers/mailbox/qcom-ipcc.c
drivers/mailbox/rockchip-mailbox.c
drivers/mailbox/sprd-mailbox.c
drivers/mailbox/stm32-ipcc.c
drivers/mailbox/tegra-hsp.c
drivers/mailbox/ti-msgmgr.c
drivers/mailbox/zynqmp-ipi-mailbox.c
drivers/media/dvb-frontends/ascot2e.c
drivers/media/dvb-frontends/atbm8830.c
drivers/media/dvb-frontends/au8522_dig.c
drivers/media/dvb-frontends/bcm3510.c
drivers/media/dvb-frontends/cx22700.c
drivers/media/dvb-frontends/cx22702.c
drivers/media/dvb-frontends/cx24110.c
drivers/media/dvb-frontends/cx24113.c
drivers/media/dvb-frontends/cx24116.c
drivers/media/dvb-frontends/cx24120.c
drivers/media/dvb-frontends/cx24123.c
drivers/media/dvb-frontends/cxd2820r_core.c
drivers/media/dvb-frontends/cxd2841er.c
drivers/media/dvb-frontends/cxd2880/cxd2880_top.c
drivers/media/dvb-frontends/dib0070.c
drivers/media/dvb-frontends/dib0090.c
drivers/media/dvb-frontends/dib3000mb.c
drivers/media/dvb-frontends/dib3000mc.c
drivers/media/dvb-frontends/dib7000m.c
drivers/media/dvb-frontends/dib7000p.c
drivers/media/dvb-frontends/dib8000.c
drivers/media/dvb-frontends/dib9000.c
drivers/media/dvb-frontends/drx39xyj/drxj.c
drivers/media/dvb-frontends/drxd_hard.c
drivers/media/dvb-frontends/drxk_hard.c
drivers/media/dvb-frontends/ds3000.c
drivers/media/dvb-frontends/dvb-pll.c
drivers/media/dvb-frontends/ec100.c
drivers/media/dvb-frontends/helene.c
drivers/media/dvb-frontends/horus3a.c
drivers/media/dvb-frontends/isl6405.c
drivers/media/dvb-frontends/isl6421.c
drivers/media/dvb-frontends/isl6423.c
drivers/media/dvb-frontends/itd1000.c
drivers/media/dvb-frontends/ix2505v.c
drivers/media/dvb-frontends/l64781.c
drivers/media/dvb-frontends/lg2160.c
drivers/media/dvb-frontends/lgdt3305.c
drivers/media/dvb-frontends/lgdt3306a.c
drivers/media/dvb-frontends/lgdt330x.c
drivers/media/dvb-frontends/lgs8gxx.c
drivers/media/dvb-frontends/lnbh25.c
drivers/media/dvb-frontends/lnbp21.c
drivers/media/dvb-frontends/lnbp22.c
drivers/media/dvb-frontends/m88ds3103.c
drivers/media/dvb-frontends/m88rs2000.c
drivers/media/dvb-frontends/mb86a16.c
drivers/media/dvb-frontends/mb86a20s.c
drivers/media/dvb-frontends/mt312.c
drivers/media/dvb-frontends/mt352.c
drivers/media/dvb-frontends/nxt200x.c
drivers/media/dvb-frontends/nxt6000.c
drivers/media/dvb-frontends/or51132.c
drivers/media/dvb-frontends/or51211.c
drivers/media/dvb-frontends/s5h1409.c
drivers/media/dvb-frontends/s5h1411.c
drivers/media/dvb-frontends/s5h1420.c
drivers/media/dvb-frontends/s5h1432.c
drivers/media/dvb-frontends/s921.c
drivers/media/dvb-frontends/si21xx.c
drivers/media/dvb-frontends/sp887x.c
drivers/media/dvb-frontends/stb0899_drv.c
drivers/media/dvb-frontends/stb6000.c
drivers/media/dvb-frontends/stb6100.c
drivers/media/dvb-frontends/stv0288.c
drivers/media/dvb-frontends/stv0297.c
drivers/media/dvb-frontends/stv0299.c
drivers/media/dvb-frontends/stv0367.c
drivers/media/dvb-frontends/stv0900_core.c
drivers/media/dvb-frontends/stv090x.c
drivers/media/dvb-frontends/stv6110.c
drivers/media/dvb-frontends/stv6110x.c
drivers/media/dvb-frontends/tda10021.c
drivers/media/dvb-frontends/tda10023.c
drivers/media/dvb-frontends/tda10048.c
drivers/media/dvb-frontends/tda1004x.c
drivers/media/dvb-frontends/tda10086.c
drivers/media/dvb-frontends/tda665x.c
drivers/media/dvb-frontends/tda8083.c
drivers/media/dvb-frontends/tda8261.c
drivers/media/dvb-frontends/tda826x.c
drivers/media/dvb-frontends/ts2020.c
drivers/media/dvb-frontends/tua6100.c
drivers/media/dvb-frontends/ves1820.c
drivers/media/dvb-frontends/ves1x93.c
drivers/media/dvb-frontends/zl10036.c
drivers/media/dvb-frontends/zl10039.c
drivers/media/dvb-frontends/zl10353.c
drivers/media/pci/bt8xx/dst.c
drivers/media/pci/bt8xx/dst_ca.c
drivers/media/pci/ddbridge/ddbridge-dummy-fe.c
drivers/media/tuners/fc0011.c
drivers/media/tuners/fc0012.c
drivers/media/tuners/fc0013.c
drivers/media/tuners/max2165.c
drivers/media/tuners/mc44s803.c
drivers/media/tuners/mt2060.c
drivers/media/tuners/mt2131.c
drivers/media/tuners/mt2266.c
drivers/media/tuners/mxl5005s.c
drivers/media/tuners/qt1010.c
drivers/media/tuners/tda18218.c
drivers/media/tuners/xc2028.c
drivers/media/tuners/xc4000.c
drivers/media/tuners/xc5000.c
drivers/mfd/Kconfig
drivers/mfd/ab8500-core.c
drivers/mfd/acer-ec-a500.c
drivers/mfd/act8945a.c
drivers/mfd/altera-a10sr.c
drivers/mfd/altera-sysmgr.c
drivers/mfd/arizona-core.c
drivers/mfd/atc260x-core.c
drivers/mfd/atmel-hlcdc.c
drivers/mfd/axp20x.c
drivers/mfd/bcm590xx.c
drivers/mfd/cros_ec_dev.c
drivers/mfd/cs47l15-tables.c
drivers/mfd/cs47l24-tables.c
drivers/mfd/cs47l35-tables.c
drivers/mfd/cs47l85-tables.c
drivers/mfd/cs47l90-tables.c
drivers/mfd/cs47l92-tables.c
drivers/mfd/da9052-i2c.c
drivers/mfd/da9055-i2c.c
drivers/mfd/da9062-core.c
drivers/mfd/exynos-lpass.c
drivers/mfd/hi6421-pmic-core.c
drivers/mfd/hi655x-pmic.c
drivers/mfd/ipaq-micro.c
drivers/mfd/iqs62x.c
drivers/mfd/lochnagar-i2c.c
drivers/mfd/lp873x.c
drivers/mfd/lp87565.c
drivers/mfd/madera-i2c.c
drivers/mfd/madera-spi.c
drivers/mfd/max14577.c
drivers/mfd/max77541.c
drivers/mfd/max77620.c
drivers/mfd/max77686.c
drivers/mfd/max77843.c
drivers/mfd/max8907.c
drivers/mfd/max8925-core.c
drivers/mfd/max8997.c
drivers/mfd/max8998.c
drivers/mfd/mc13xxx-i2c.c
drivers/mfd/mt6358-irq.c
drivers/mfd/mt6397-core.c
drivers/mfd/mt6397-irq.c
drivers/mfd/mxs-lradc.c
drivers/mfd/omap-usb-host.c
drivers/mfd/omap-usb-tll.c
drivers/mfd/palmas.c
drivers/mfd/qcom-pm8008.c
drivers/mfd/qcom-pm8xxx.c
drivers/mfd/rave-sp.c
drivers/mfd/rk8xx-core.c
drivers/mfd/rn5t618.c
drivers/mfd/rohm-bd71828.c
drivers/mfd/rohm-bd718x7.c
drivers/mfd/rohm-bd9576.c
drivers/mfd/rsmu_i2c.c
drivers/mfd/rsmu_spi.c
drivers/mfd/rt5033.c
drivers/mfd/rz-mtu3.c
drivers/mfd/sec-core.c
drivers/mfd/sprd-sc27xx-spi.c
drivers/mfd/ssbi.c
drivers/mfd/stm32-lptimer.c
drivers/mfd/stm32-timers.c
drivers/mfd/stmpe-i2c.c
drivers/mfd/stpmic1.c
drivers/mfd/sun4i-gpadc.c
drivers/mfd/tc3589x.c
drivers/mfd/ti-lmu.c
drivers/mfd/ti_am335x_tscadc.c
drivers/mfd/tps6507x.c
drivers/mfd/tps65090.c
drivers/mfd/tps65217.c
drivers/mfd/tps65218.c
drivers/mfd/tps6594-core.c
drivers/mfd/twl6040.c
drivers/mfd/wm5102-tables.c
drivers/mfd/wm5110-tables.c
drivers/mfd/wm831x-core.c
drivers/mfd/wm831x-i2c.c
drivers/mfd/wm831x-spi.c
drivers/mfd/wm8994-core.c
drivers/mfd/wm8994-regmap.c
drivers/mfd/wm8997-tables.c
drivers/mfd/wm8998-tables.c
drivers/mtd/chips/cfi_cmdset_0002.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/sja1105/sja1105.h
drivers/net/dsa/sja1105/sja1105_main.c
drivers/net/dsa/sja1105/sja1105_spi.c
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/google/gve/gve_rx_dqo.c
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igbvf/igbvf.h
drivers/net/ethernet/intel/igc/igc.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/sfc/rx.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/macsec.c
drivers/net/phy/micrel.c
drivers/net/veth.c
drivers/nfc/nxp-nci/i2c.c
drivers/ntb/hw/amd/ntb_hw_amd.c
drivers/ntb/ntb_transport.c
drivers/ntb/test/ntb_perf.c
drivers/ntb/test/ntb_tool.c
drivers/pci/Kconfig
drivers/pci/probe.c
drivers/pci/quirks.c
drivers/perf/arm_pmuv3.c
drivers/perf/cxl_pmu.c
drivers/power/supply/power_supply_core.c
drivers/powercap/intel_rapl_common.c
drivers/pwm/Kconfig
drivers/pwm/core.c
drivers/pwm/pwm-apple.c
drivers/pwm/pwm-atmel-hlcdc.c
drivers/pwm/pwm-atmel-tcb.c
drivers/pwm/pwm-atmel.c
drivers/pwm/pwm-bcm-kona.c
drivers/pwm/pwm-berlin.c
drivers/pwm/pwm-crc.c
drivers/pwm/pwm-cros-ec.c
drivers/pwm/pwm-fsl-ftm.c
drivers/pwm/pwm-hibvt.c
drivers/pwm/pwm-imx1.c
drivers/pwm/pwm-jz4740.c
drivers/pwm/pwm-lp3943.c
drivers/pwm/pwm-lpc18xx-sct.c
drivers/pwm/pwm-lpc32xx.c
drivers/pwm/pwm-mediatek.c
drivers/pwm/pwm-meson.c
drivers/pwm/pwm-microchip-core.c
drivers/pwm/pwm-mtk-disp.c
drivers/pwm/pwm-ntxec.c
drivers/pwm/pwm-pxa.c
drivers/pwm/pwm-rockchip.c
drivers/pwm/pwm-rz-mtu3.c
drivers/pwm/pwm-sifive.c
drivers/pwm/pwm-sl28cpld.c
drivers/pwm/pwm-sprd.c
drivers/pwm/pwm-stm32.c
drivers/pwm/pwm-stmpe.c
drivers/pwm/pwm-sun4i.c
drivers/pwm/pwm-sunplus.c
drivers/pwm/pwm-tegra.c
drivers/pwm/pwm-tiecap.c
drivers/pwm/pwm-tiehrpwm.c
drivers/pwm/pwm-visconti.c
drivers/pwm/pwm-vt8500.c
drivers/regulator/tps6287x-regulator.c
drivers/regulator/tps6594-regulator.c
drivers/remoteproc/imx_dsp_rproc.c
drivers/remoteproc/imx_rproc.c
drivers/remoteproc/imx_rproc.h
drivers/remoteproc/omap_remoteproc.c
drivers/remoteproc/pru_rproc.c
drivers/remoteproc/qcom_common.c
drivers/remoteproc/qcom_q6v5_adsp.c
drivers/remoteproc/qcom_q6v5_mss.c
drivers/remoteproc/qcom_q6v5_pas.c
drivers/remoteproc/qcom_q6v5_wcss.c
drivers/remoteproc/qcom_sysmon.c
drivers/remoteproc/qcom_wcnss.c
drivers/remoteproc/qcom_wcnss_iris.c
drivers/remoteproc/rcar_rproc.c
drivers/remoteproc/remoteproc_coredump.c
drivers/remoteproc/remoteproc_internal.h
drivers/remoteproc/st_slim_rproc.c
drivers/remoteproc/stm32_rproc.c
drivers/remoteproc/ti_k3_dsp_remoteproc.c
drivers/remoteproc/ti_k3_r5_remoteproc.c
drivers/remoteproc/wkup_m3_rproc.c
drivers/rpmsg/qcom_glink_native.c
drivers/rpmsg/rpmsg_char.c
drivers/rpmsg/rpmsg_core.c
drivers/rpmsg/rpmsg_internal.h
drivers/rtc/Kconfig
drivers/rtc/interface.c
drivers/rtc/rtc-abx80x.c
drivers/rtc/rtc-armada38x.c
drivers/rtc/rtc-aspeed.c
drivers/rtc/rtc-at91rm9200.c
drivers/rtc/rtc-at91sam9.c
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-cros-ec.c
drivers/rtc/rtc-da9063.c
drivers/rtc/rtc-ds1305.c
drivers/rtc/rtc-ds1307.c
drivers/rtc/rtc-ds1742.c
drivers/rtc/rtc-ds2404.c
drivers/rtc/rtc-fsl-ftm-alarm.c
drivers/rtc/rtc-isl12022.c
drivers/rtc/rtc-isl12026.c
drivers/rtc/rtc-isl1208.c
drivers/rtc/rtc-jz4740.c
drivers/rtc/rtc-lpc24xx.c
drivers/rtc/rtc-m41t80.c
drivers/rtc/rtc-m48t86.c
drivers/rtc/rtc-mpc5121.c
drivers/rtc/rtc-mt6397.c
drivers/rtc/rtc-mt7622.c
drivers/rtc/rtc-mxc.c
drivers/rtc/rtc-nct3018y.c
drivers/rtc/rtc-omap.c
drivers/rtc/rtc-pcf2127.c
drivers/rtc/rtc-pcf85063.c
drivers/rtc/rtc-pcf85363.c
drivers/rtc/rtc-pxa.c
drivers/rtc/rtc-rs5c372.c
drivers/rtc/rtc-rv3028.c
drivers/rtc/rtc-rv3032.c
drivers/rtc/rtc-rv8803.c
drivers/rtc/rtc-rx6110.c
drivers/rtc/rtc-rx8581.c
drivers/rtc/rtc-rzn1.c
drivers/rtc/rtc-s3c.c
drivers/rtc/rtc-stm32.c
drivers/rtc/rtc-stmp3xxx.c
drivers/rtc/rtc-sun6i.c
drivers/rtc/rtc-sunplus.c
drivers/rtc/rtc-sunxi.c
drivers/rtc/rtc-ti-k3.c
drivers/rtc/rtc-tps6586x.c
drivers/rtc/rtc-tps65910.c
drivers/rtc/rtc-twl.c
drivers/rtc/rtc-wm8350.c
drivers/s390/block/dasd_devmap.c
drivers/s390/block/dasd_eckd.c
drivers/s390/block/dasd_int.h
drivers/s390/block/dcssblk.c
drivers/s390/char/monreader.c
drivers/s390/cio/airq.c
drivers/s390/crypto/zcrypt_api.c
drivers/s390/virtio/virtio_ccw.c
drivers/scsi/aacraid/commsup.c
drivers/scsi/fnic/fnic.h
drivers/scsi/fnic/fnic_scsi.c
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/libsas/sas_ata.c
drivers/scsi/libsas/sas_discover.c
drivers/scsi/libsas/sas_scsi_host.c
drivers/scsi/mpt3sas/mpi/mpi2.h
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.h
drivers/scsi/mvumi.c
drivers/scsi/qedf/qedf.h
drivers/scsi/qedi/qedi_gbl.h
drivers/scsi/qla2xxx/qla_attr.c
drivers/scsi/qla2xxx/qla_dbg.c
drivers/scsi/qla2xxx/qla_dbg.h
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_dfs.c
drivers/scsi/qla2xxx/qla_gbl.h
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_inline.h
drivers/scsi/qla2xxx/qla_iocb.c
drivers/scsi/qla2xxx/qla_isr.c
drivers/scsi/qla2xxx/qla_mbx.c
drivers/scsi/qla2xxx/qla_nvme.c
drivers/scsi/qla2xxx/qla_nvme.h
drivers/scsi/qla2xxx/qla_nx.h
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/qla2xxx/qla_version.h
drivers/scsi/scsi_debugfs.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_priv.h
drivers/scsi/scsi_scan.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/sd.c
drivers/scsi/smartpqi/smartpqi.h
drivers/scsi/smartpqi/smartpqi_init.c
drivers/scsi/st.c
drivers/scsi/storvsc_drv.c
drivers/scsi/virtio_scsi.c
drivers/scsi/xen-scsifront.c
drivers/soc/renesas/Kconfig
drivers/spi/spi-sun6i.c
drivers/staging/greybus/pwm.c
drivers/staging/media/av7110/sp8870.c
drivers/thermal/Kconfig
drivers/thermal/Makefile
drivers/thermal/armada_thermal.c
drivers/thermal/broadcom/brcmstb_thermal.c
drivers/thermal/broadcom/sr-thermal.c
drivers/thermal/db8500_thermal.c
drivers/thermal/dove_thermal.c
drivers/thermal/imx8mm_thermal.c
drivers/thermal/intel/int340x_thermal/int3400_thermal.c
drivers/thermal/k3_bandgap.c
drivers/thermal/k3_j72xx_bandgap.c
drivers/thermal/kirkwood_thermal.c
drivers/thermal/loongson2_thermal.c [new file with mode: 0644]
drivers/thermal/max77620_thermal.c
drivers/thermal/mediatek/auxadc_thermal.c
drivers/thermal/mediatek/lvts_thermal.c
drivers/thermal/qcom/tsens-v0_1.c
drivers/thermal/qcom/tsens-v1.c
drivers/thermal/samsung/exynos_tmu.c
drivers/thermal/spear_thermal.c
drivers/thermal/sun8i_thermal.c
drivers/thermal/tegra/tegra-bpmp-thermal.c
drivers/thermal/thermal-generic-adc.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_core.h
drivers/thermal/thermal_helpers.c
drivers/thermal/thermal_trip.c
drivers/thermal/ti-soc-thermal/ti-bandgap.c
drivers/thermal/ti-soc-thermal/ti-thermal-common.c
drivers/ufs/core/ufs_bsg.c
drivers/ufs/core/ufshcd.c
drivers/usb/dwc3/dwc3-octeon.c
drivers/video/backlight/gpio_backlight.c
drivers/video/backlight/led_bl.c
drivers/video/backlight/lp855x_bl.c
drivers/video/backlight/qcom-wled.c
drivers/watchdog/Kconfig
drivers/watchdog/Makefile
drivers/watchdog/armada_37xx_wdt.c
drivers/watchdog/at91rm9200_wdt.c
drivers/watchdog/cpwd.c
drivers/watchdog/ftwdt010_wdt.c
drivers/watchdog/gef_wdt.c
drivers/watchdog/imx2_wdt.c
drivers/watchdog/imx7ulp_wdt.c
drivers/watchdog/intel-mid_wdt.c
drivers/watchdog/lantiq_wdt.c
drivers/watchdog/loongson1_wdt.c
drivers/watchdog/marvell_gti_wdt.c [new file with mode: 0644]
drivers/watchdog/menz69_wdt.c
drivers/watchdog/meson_gxbb_wdt.c
drivers/watchdog/meson_wdt.c
drivers/watchdog/mpc8xxx_wdt.c
drivers/watchdog/mtk_wdt.c
drivers/watchdog/of_xilinx_wdt.c
drivers/watchdog/pic32-dmt.c
drivers/watchdog/pic32-wdt.c
drivers/watchdog/pika_wdt.c
drivers/watchdog/pm8916_wdt.c
drivers/watchdog/qcom-wdt.c
drivers/watchdog/rave-sp-wdt.c
drivers/watchdog/riowd.c
drivers/watchdog/rti_wdt.c
drivers/watchdog/rza_wdt.c
drivers/watchdog/rzg2l_wdt.c
drivers/watchdog/s3c2410_wdt.c
drivers/watchdog/sama5d4_wdt.c
drivers/watchdog/sbsa_gwdt.c
drivers/watchdog/starfive-wdt.c
drivers/watchdog/stm32_iwdg.c
drivers/watchdog/sunxi_wdt.c
drivers/watchdog/watchdog_core.c
drivers/watchdog/xilinx_wwdt.c
fs/ceph/Makefile
fs/ceph/acl.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/crypto.c [new file with mode: 0644]
fs/ceph/crypto.h [new file with mode: 0644]
fs/ceph/dir.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/ioctl.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/quota.c
fs/ceph/snap.c
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/fuse/dir.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/fuse/readdir.c
fs/gfs2/aops.c
fs/gfs2/bmap.c
fs/gfs2/glock.c
fs/gfs2/glock.h
fs/gfs2/glops.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/lock_dlm.c
fs/gfs2/log.c
fs/gfs2/lops.c
fs/gfs2/main.c
fs/gfs2/ops_fstype.c
fs/gfs2/quota.c
fs/gfs2/recovery.c
fs/gfs2/recovery.h
fs/gfs2/super.c
fs/gfs2/super.h
fs/gfs2/sys.c
fs/gfs2/util.c
fs/nls/Kconfig
fs/ntfs3/super.c
fs/proc/task_mmu.c
fs/smb/client/cached_dir.c
fs/smb/client/cached_dir.h
fs/smb/client/cifsfs.c
fs/smb/client/cifsfs.h
fs/smb/client/cifsglob.h
fs/smb/client/connect.c
fs/smb/client/fs_context.c
fs/smb/client/fs_context.h
fs/smb/client/fscache.c
fs/smb/client/smb2ops.c
fs/smb/client/trace.h
fs/smb/common/smb2pdu.h
fs/smb/server/Kconfig
fs/smb/server/server.c
fs/stat.c
include/asm-generic/ide_iops.h [deleted file]
include/kvm/arm_pmu.h
include/linux/audit.h
include/linux/bpf.h
include/linux/ceph/ceph_fs.h
include/linux/ceph/messenger.h
include/linux/ceph/osd_client.h
include/linux/ceph/rados.h
include/linux/console.h
include/linux/cpufreq.h
include/linux/export.h
include/linux/gameport.h
include/linux/ipv6.h
include/linux/kasan.h
include/linux/kvm_host.h
include/linux/leds.h
include/linux/libata.h
include/linux/mfd/88pm860x.h
include/linux/mfd/abx500/ab8500.h
include/linux/mfd/dbx500-prcmu.h
include/linux/mfd/hi655x-pmic.h
include/linux/mfd/max77686-private.h
include/linux/mfd/rz-mtu3.h
include/linux/micrel_phy.h
include/linux/nvme-fc-driver.h
include/linux/of.h
include/linux/oid_registry.h
include/linux/phylink.h
include/linux/platform_data/rtc-ds2404.h [deleted file]
include/linux/pwm.h
include/linux/raid/pq.h
include/linux/remoteproc.h
include/linux/rmap.h
include/linux/rpmsg.h
include/linux/rtc.h
include/linux/tca6416_keypad.h
include/linux/thermal.h
include/linux/virtio.h
include/linux/xarray.h
include/net/ip.h
include/net/ip6_fib.h
include/net/ip_fib.h
include/net/ip_tunnels.h
include/net/scm.h
include/net/sock.h
include/scsi/scsi_host.h
include/sound/dmaengine_pcm.h
include/sound/soc-component.h
include/uapi/linux/elf.h
include/uapi/linux/fuse.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/rpmsg.h
include/uapi/scsi/scsi_bsg_ufs.h
io_uring/fdinfo.c
io_uring/io-wq.c
io_uring/io-wq.h
io_uring/io_uring.c
io_uring/sqpoll.c
kernel/auditsc.c
kernel/bpf/bpf_local_storage.c
kernel/bpf/core.c
kernel/bpf/syscall.c
kernel/bpf/trampoline.c
kernel/configs/debug.config
kernel/configs/kvm_guest.config
kernel/configs/nopm.config
kernel/configs/rust.config
kernel/configs/x86_debug.config
kernel/configs/xen.config
kernel/debug/kdb/kdb_io.c
kernel/dma/Kconfig
kernel/dma/contiguous.c
kernel/dma/debug.c
kernel/dma/pool.c
kernel/printk/internal.h
kernel/printk/printk.c
kernel/printk/printk_ringbuffer.c
kernel/printk/printk_safe.c
lib/Kconfig.debug
lib/Makefile
lib/idr.c
lib/iov_iter.c
lib/kunit_iov_iter.c [new file with mode: 0644]
lib/raid6/Makefile
lib/raid6/algos.c
lib/raid6/loongarch.h [new file with mode: 0644]
lib/raid6/loongarch_simd.c [new file with mode: 0644]
lib/raid6/recov_loongarch_simd.c [new file with mode: 0644]
lib/raid6/test/Makefile
lib/test_scanf.c
lib/xarray.c
mm/filemap.c
mm/kasan/init.c
mm/kasan/kasan.h
mm/kfence/core.c
mm/kmemleak.c
mm/ksm.c
mm/memcontrol.c
mm/memfd.c
mm/memory-failure.c
mm/page_alloc.c
mm/util.c
mm/vmalloc.c
net/bpf/test_run.c
net/can/j1939/socket.c
net/ceph/messenger.c
net/ceph/messenger_v1.c
net/ceph/messenger_v2.c
net/ceph/osd_client.c
net/core/flow_dissector.c
net/core/skbuff.c
net/core/skmsg.c
net/core/sock.c
net/core/sock_map.c
net/handshake/netlink.c
net/ipv4/fib_semantics.c
net/ipv4/fib_trie.c
net/ipv4/igmp.c
net/ipv4/ip_forward.c
net/ipv4/ip_input.c
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c
net/ipv4/ipmr.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_output.c
net/ipv4/udp.c
net/ipv6/addrconf.c
net/ipv6/ip6_input.c
net/ipv6/ip6_output.c
net/ipv6/ip6mr.c
net/ipv6/ping.c
net/ipv6/raw.c
net/ipv6/route.c
net/ipv6/udp.c
net/kcm/kcmsock.c
net/mptcp/protocol.c
net/netfilter/ipset/ip_set_hash_netportnet.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink_osf.c
net/netfilter/nft_exthdr.c
net/netfilter/nft_set_rbtree.c
net/netfilter/xt_sctp.c
net/netfilter/xt_u32.c
net/sched/sch_fq_pie.c
net/sched/sch_plug.c
net/sched/sch_qfq.c
net/sctp/proc.c
net/sctp/socket.c
net/socket.c
net/unix/af_unix.c
net/unix/scm.c
net/xdp/xsk.c
net/xdp/xsk_diag.c
scripts/Makefile.extrawarn
scripts/Makefile.modinst
scripts/Makefile.modpost
scripts/Makefile.package
scripts/bpf_doc.py
scripts/depmod.sh
scripts/dummy-tools/gcc
scripts/kconfig/Makefile
scripts/kconfig/confdata.c
scripts/kconfig/expr.h
scripts/kconfig/lkc.h
scripts/kconfig/lxdialog/dialog.h
scripts/kconfig/lxdialog/textbox.c
scripts/kconfig/mconf.c
scripts/kconfig/menu.c
scripts/kconfig/nconf.c
scripts/kconfig/nconf.gui.c
scripts/kconfig/nconf.h
scripts/kconfig/preprocess.c
scripts/kconfig/qconf-cfg.sh
scripts/kconfig/qconf.cc
scripts/mod/modpost.c
scripts/mod/modpost.h
scripts/package/builddeb
scripts/package/debian/rules [new file with mode: 0755]
scripts/package/install-extmod-build [new file with mode: 0755]
scripts/package/kernel.spec [new file with mode: 0644]
scripts/package/mkdebian
scripts/package/mkspec
scripts/remove-stale-files
scripts/setlocalversion
security/landlock/ruleset.h
sound/core/pcm_lib.c
sound/core/seq/seq_memory.c
sound/isa/sb/emu8000_pcm.c
sound/pci/hda/patch_cs8409.c
sound/pci/hda/patch_cs8409.h
sound/pci/hda/patch_realtek.c
sound/pci/hda/tas2781_hda_i2c.c
sound/soc/amd/yc/acp6x-mach.c
sound/soc/atmel/mchp-pdmc.c
sound/soc/codecs/Kconfig
sound/soc/codecs/Makefile
sound/soc/codecs/cs35l45.c
sound/soc/codecs/cs35l56-shared.c
sound/soc/codecs/cs42l43.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/wcd-clsh-v2.c
sound/soc/intel/avs/pcm.c
sound/soc/soc-component.c
sound/soc/soc-generic-dmaengine-pcm.c
sound/soc/stm/stm32_sai_sub.c
sound/usb/midi2.c
tools/arch/x86/include/asm/cpufeatures.h
tools/bpf/bpftool/link.c
tools/build/Makefile.build
tools/build/feature/Makefile
tools/build/feature/test-clang.cpp [deleted file]
tools/build/feature/test-cxx.cpp [deleted file]
tools/build/feature/test-llvm-version.cpp [deleted file]
tools/build/feature/test-llvm.cpp [deleted file]
tools/lib/perf/include/perf/event.h
tools/mm/Makefile
tools/perf/Documentation/perf-bench.txt
tools/perf/Documentation/perf-config.txt
tools/perf/Documentation/perf-dlfilter.txt
tools/perf/Documentation/perf-ftrace.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf.data-file-format.txt
tools/perf/Makefile.config
tools/perf/Makefile.perf
tools/perf/arch/arm/include/perf_regs.h
tools/perf/arch/arm/util/cs-etm.c
tools/perf/arch/arm/util/perf_regs.c
tools/perf/arch/arm/util/unwind-libdw.c
tools/perf/arch/arm64/include/arch-tests.h
tools/perf/arch/arm64/include/perf_regs.h
tools/perf/arch/arm64/tests/Build
tools/perf/arch/arm64/tests/arch-tests.c
tools/perf/arch/arm64/tests/cpuid-match.c [new file with mode: 0644]
tools/perf/arch/arm64/util/arm-spe.c
tools/perf/arch/arm64/util/header.c
tools/perf/arch/arm64/util/machine.c
tools/perf/arch/arm64/util/mem-events.c
tools/perf/arch/arm64/util/perf_regs.c
tools/perf/arch/arm64/util/pmu.c
tools/perf/arch/arm64/util/unwind-libdw.c
tools/perf/arch/csky/include/perf_regs.h
tools/perf/arch/csky/util/perf_regs.c
tools/perf/arch/csky/util/unwind-libdw.c
tools/perf/arch/loongarch/include/perf_regs.h
tools/perf/arch/loongarch/util/perf_regs.c
tools/perf/arch/loongarch/util/unwind-libdw.c
tools/perf/arch/mips/include/perf_regs.h
tools/perf/arch/mips/util/perf_regs.c
tools/perf/arch/powerpc/include/perf_regs.h
tools/perf/arch/powerpc/util/mem-events.c
tools/perf/arch/powerpc/util/perf_regs.c
tools/perf/arch/powerpc/util/unwind-libdw.c
tools/perf/arch/riscv/include/perf_regs.h
tools/perf/arch/riscv/util/perf_regs.c
tools/perf/arch/riscv/util/unwind-libdw.c
tools/perf/arch/s390/include/perf_regs.h
tools/perf/arch/s390/util/perf_regs.c
tools/perf/arch/s390/util/unwind-libdw.c
tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
tools/perf/arch/x86/include/perf_regs.h
tools/perf/arch/x86/util/evlist.c
tools/perf/arch/x86/util/evsel.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/arch/x86/util/mem-events.c
tools/perf/arch/x86/util/perf_regs.c
tools/perf/arch/x86/util/pmu.c
tools/perf/arch/x86/util/unwind-libdw.c
tools/perf/bench/Build
tools/perf/bench/bench.h
tools/perf/bench/breakpoint.c
tools/perf/bench/pmu-scan.c
tools/perf/bench/uprobe.c [new file with mode: 0644]
tools/perf/builtin-bench.c
tools/perf/builtin-diff.c
tools/perf/builtin-list.c
tools/perf/builtin-lock.c
tools/perf/builtin-record.c
tools/perf/builtin-script.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/check-headers.sh
tools/perf/dlfilters/dlfilter-test-api-v0.c
tools/perf/dlfilters/dlfilter-test-api-v2.c [new file with mode: 0644]
tools/perf/examples/bpf/5sec.c [deleted file]
tools/perf/examples/bpf/empty.c [deleted file]
tools/perf/examples/bpf/hello.c [deleted file]
tools/perf/examples/bpf/sys_enter_openat.c [deleted file]
tools/perf/include/perf/perf_dlfilter.h
tools/perf/perf.c
tools/perf/pmu-events/Build
tools/perf/pmu-events/arch/arm64/ampere/ampereone/cache.json
tools/perf/pmu-events/arch/arm64/ampere/ampereone/core-imp-def.json
tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/ampere/ampereone/pipeline.json
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json [deleted file]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json [deleted file]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/fp_operation.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json [deleted file]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1i_cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json [deleted file]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/sve.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json
tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/sbsa.json
tools/perf/pmu-events/arch/powerpc/power10/cache.json
tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
tools/perf/pmu-events/arch/powerpc/power10/frontend.json
tools/perf/pmu-events/arch/powerpc/power10/marked.json
tools/perf/pmu-events/arch/powerpc/power10/memory.json
tools/perf/pmu-events/arch/powerpc/power10/metrics.json
tools/perf/pmu-events/arch/powerpc/power10/others.json
tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
tools/perf/pmu-events/arch/powerpc/power10/pmc.json
tools/perf/pmu-events/arch/powerpc/power10/translation.json
tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
tools/perf/pmu-events/arch/x86/mapfile.csv
tools/perf/pmu-events/arch/x86/meteorlake/cache.json
tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
tools/perf/pmu-events/arch/x86/meteorlake/memory.json
tools/perf/pmu-events/arch/x86/meteorlake/other.json
tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
tools/perf/pmu-events/arch/x86/sapphirerapids/other.json
tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
tools/perf/pmu-events/empty-pmu-events.c
tools/perf/pmu-events/jevents.py
tools/perf/pmu-events/metric.py
tools/perf/pmu-events/pmu-events.h
tools/perf/scripts/python/Perf-Trace-Util/Build
tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
tools/perf/scripts/python/bin/gecko-record [new file with mode: 0644]
tools/perf/scripts/python/bin/gecko-report [new file with mode: 0755]
tools/perf/scripts/python/gecko.py [new file with mode: 0644]
tools/perf/tests/.gitignore [deleted file]
tools/perf/tests/Build
tools/perf/tests/bpf-script-example.c [deleted file]
tools/perf/tests/bpf-script-test-kbuild.c [deleted file]
tools/perf/tests/bpf-script-test-prologue.c [deleted file]
tools/perf/tests/bpf-script-test-relocation.c [deleted file]
tools/perf/tests/bpf.c [deleted file]
tools/perf/tests/builtin-test.c
tools/perf/tests/clang.c [deleted file]
tools/perf/tests/config-fragments/README [new file with mode: 0644]
tools/perf/tests/config-fragments/arm64 [new file with mode: 0644]
tools/perf/tests/config-fragments/config [new file with mode: 0644]
tools/perf/tests/dlfilter-test.c
tools/perf/tests/expr.c
tools/perf/tests/llvm.c [deleted file]
tools/perf/tests/llvm.h [deleted file]
tools/perf/tests/make
tools/perf/tests/parse-events.c
tools/perf/tests/pmu-events.c
tools/perf/tests/pmu.c
tools/perf/tests/shell/coresight/asm_pure_loop.sh
tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh
tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh
tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh
tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh
tools/perf/tests/shell/lib/probe.sh
tools/perf/tests/shell/lib/probe_vfs_getname.sh
tools/perf/tests/shell/lib/stat_output.sh
tools/perf/tests/shell/lib/waiting.sh
tools/perf/tests/shell/lock_contention.sh
tools/perf/tests/shell/probe_vfs_getname.sh
tools/perf/tests/shell/record+zstd_comp_decomp.sh
tools/perf/tests/shell/record_bpf_filter.sh [new file with mode: 0755]
tools/perf/tests/shell/record_offcpu.sh
tools/perf/tests/shell/stat+csv_output.sh
tools/perf/tests/shell/stat+csv_summary.sh
tools/perf/tests/shell/stat+shadow_stat.sh
tools/perf/tests/shell/stat+std_output.sh
tools/perf/tests/shell/stat_bpf_counters.sh
tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
tools/perf/tests/shell/test_arm_spe_fork.sh
tools/perf/tests/shell/test_perf_data_converter_json.sh
tools/perf/tests/shell/test_task_analyzer.sh
tools/perf/tests/shell/trace+probe_vfs_getname.sh
tools/perf/tests/stat.c
tools/perf/tests/tests.h
tools/perf/trace/beauty/arch_errno_names.sh
tools/perf/trace/beauty/beauty.h
tools/perf/trace/beauty/mmap_flags.sh
tools/perf/trace/beauty/mmap_prot.sh
tools/perf/trace/beauty/x86_arch_prctl.sh
tools/perf/ui/Build
tools/perf/ui/browser.c
tools/perf/ui/browsers/Build
tools/perf/ui/browsers/hists.c
tools/perf/ui/libslang.h
tools/perf/ui/tui/helpline.c
tools/perf/ui/tui/setup.c
tools/perf/ui/tui/util.c
tools/perf/util/Build
tools/perf/util/amd-sample-raw.c
tools/perf/util/annotate.c
tools/perf/util/bpf-filter.c
tools/perf/util/bpf-filter.y
tools/perf/util/bpf-loader.c [deleted file]
tools/perf/util/bpf-loader.h [deleted file]
tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c [moved from tools/perf/examples/bpf/augmented_raw_syscalls.c with 91% similarity]
tools/perf/util/bpf_skel/bench_uprobe.bpf.c [new file with mode: 0644]
tools/perf/util/build-id.c
tools/perf/util/c++/Build [deleted file]
tools/perf/util/c++/clang-c.h [deleted file]
tools/perf/util/c++/clang-test.cpp [deleted file]
tools/perf/util/c++/clang.cpp [deleted file]
tools/perf/util/c++/clang.h [deleted file]
tools/perf/util/config.c
tools/perf/util/cs-etm.c
tools/perf/util/dlfilter.c
tools/perf/util/env.c
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evsel.c
tools/perf/util/expr.c
tools/perf/util/expr.h
tools/perf/util/expr.l
tools/perf/util/expr.y
tools/perf/util/header.c
tools/perf/util/libunwind/arm64.c
tools/perf/util/libunwind/x86_32.c
tools/perf/util/llvm-utils.c [deleted file]
tools/perf/util/llvm-utils.h [deleted file]
tools/perf/util/lzma.c
tools/perf/util/machine.c
tools/perf/util/mem-events.c
tools/perf/util/mem-events.h
tools/perf/util/metricgroup.c
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-events.l
tools/perf/util/parse-events.y
tools/perf/util/perf-regs-arch/Build [new file with mode: 0644]
tools/perf/util/perf-regs-arch/perf_regs_aarch64.c [new file with mode: 0644]
tools/perf/util/perf-regs-arch/perf_regs_arm.c [new file with mode: 0644]
tools/perf/util/perf-regs-arch/perf_regs_csky.c [new file with mode: 0644]
tools/perf/util/perf-regs-arch/perf_regs_loongarch.c [new file with mode: 0644]
tools/perf/util/perf-regs-arch/perf_regs_mips.c [new file with mode: 0644]
tools/perf/util/perf-regs-arch/perf_regs_powerpc.c [new file with mode: 0644]
tools/perf/util/perf-regs-arch/perf_regs_riscv.c [new file with mode: 0644]
tools/perf/util/perf-regs-arch/perf_regs_s390.c [new file with mode: 0644]
tools/perf/util/perf-regs-arch/perf_regs_x86.c [new file with mode: 0644]
tools/perf/util/perf_regs.c
tools/perf/util/perf_regs.h
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/pmu.y
tools/perf/util/pmus.c
tools/perf/util/pmus.h
tools/perf/util/print-events.h
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/python-ext-sources
tools/perf/util/python.c
tools/perf/util/s390-sample-raw.c
tools/perf/util/scripting-engines/Build
tools/perf/util/session.c
tools/perf/util/setup.py
tools/perf/util/stat-display.c
tools/perf/util/stat.c
tools/perf/util/svghelper.c
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.c
tools/perf/util/synthetic-events.c
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/unwind-libdw.c
tools/perf/util/unwind-libunwind-local.c
tools/perf/util/unwind.h
tools/power/cpupower/Makefile
tools/scripts/utilities.mak
tools/testing/radix-tree/multiorder.c
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
tools/testing/selftests/bpf/prog_tests/d_path.c
tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/progs/bpf_tracing_net.h
tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c [new file with mode: 0644]
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
tools/testing/selftests/kvm/aarch64/arch_timer.c
tools/testing/selftests/kvm/aarch64/debug-exceptions.c
tools/testing/selftests/kvm/aarch64/get-reg-list.c
tools/testing/selftests/kvm/aarch64/hypercalls.c
tools/testing/selftests/kvm/aarch64/page_fault_test.c
tools/testing/selftests/kvm/aarch64/vgic_irq.c
tools/testing/selftests/kvm/get-reg-list.c [new file with mode: 0644]
tools/testing/selftests/kvm/guest_print_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/include/aarch64/arch_timer.h
tools/testing/selftests/kvm/include/aarch64/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/kvm_util_base.h
tools/testing/selftests/kvm/include/riscv/processor.h
tools/testing/selftests/kvm/include/riscv/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/s390x/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/test_util.h
tools/testing/selftests/kvm/include/ucall_common.h
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/include/x86_64/ucall.h [new file with mode: 0644]
tools/testing/selftests/kvm/kvm_page_table_test.c
tools/testing/selftests/kvm/lib/aarch64/ucall.c
tools/testing/selftests/kvm/lib/guest_sprintf.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/riscv/ucall.c
tools/testing/selftests/kvm/lib/s390x/ucall.c
tools/testing/selftests/kvm/lib/sparsebit.c
tools/testing/selftests/kvm/lib/string_override.c
tools/testing/selftests/kvm/lib/test_util.c
tools/testing/selftests/kvm/lib/ucall_common.c
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/lib/x86_64/ucall.c
tools/testing/selftests/kvm/max_guest_memory_test.c
tools/testing/selftests/kvm/memslot_perf_test.c
tools/testing/selftests/kvm/riscv/get-reg-list.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390x/cmma_test.c
tools/testing/selftests/kvm/s390x/debug_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/s390x/memop.c
tools/testing/selftests/kvm/s390x/tprot.c
tools/testing/selftests/kvm/set_memory_region_test.c
tools/testing/selftests/kvm/steal_time.c
tools/testing/selftests/kvm/x86_64/cpuid_test.c
tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
tools/testing/selftests/kvm/x86_64/hyperv_features.c
tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
tools/testing/selftests/kvm/x86_64/sync_regs_test.c
tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
tools/testing/selftests/kvm/x86_64/userspace_io_test.c
tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
tools/testing/selftests/kvm/x86_64/xapic_state_test.c
tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
tools/testing/selftests/landlock/fs_test.c
tools/testing/selftests/net/fib_tests.sh
virt/kvm/Kconfig
virt/kvm/kvm_main.c

index 9fd4c95..0bbae16 100644 (file)
@@ -74,7 +74,7 @@ modules.order
 #
 # RPM spec file (make rpm-pkg)
 #
-/*.spec
+/kernel.spec
 /rpmbuild/
 
 #
index 2e24ac3..b2ff001 100644 (file)
@@ -59,6 +59,15 @@ Description:
                brightness. Reading this file when no hw brightness change
                event has happened will return an ENODATA error.
 
+What:          /sys/class/leds/<led>/color
+Date:          June 2023
+KernelVersion: 6.5
+Description:
+               Color of the LED.
+
+               This is a read-only file. Reading this file returns the color
+               of the LED as a string (e.g: "red", "green", "multicolor").
+
 What:          /sys/class/leds/<led>/trigger
 Date:          March 2006
 KernelVersion: 2.6.17
index 11de998..7d84002 100644 (file)
@@ -88,6 +88,11 @@ data bandwidth::
     -e ali_drw_27080/hif_rmw/ \
     -e ali_drw_27080/cycle/ -- sleep 10
 
+Example usage of counting all memory read/write bandwidth by metric::
+
+  perf stat -M ddr_read_bandwidth.all -- sleep 10
+  perf stat -M ddr_write_bandwidth.all -- sleep 10
+
 The average DRAM bandwidth can be calculated as follows:
 
 - Read Bandwidth =  perf_hif_rd * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle
index 8019103..cf33de5 100644 (file)
@@ -450,6 +450,35 @@ this allows system administrators to override the
 ``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded.
 
 
+io_uring_disabled
+=================
+
+Prevents all processes from creating new io_uring instances. Enabling this
+shrinks the kernel's attack surface.
+
+= ======================================================================
+0 All processes can create io_uring instances as normal. This is the
+  default setting.
+1 io_uring creation is disabled (io_uring_setup() will fail with
+  -EPERM) for unprivileged processes not in the io_uring_group group.
+  Existing io_uring instances can still be used.  See the
+  documentation for io_uring_group for more information.
+2 io_uring creation is disabled for all processes. io_uring_setup()
+  always fails with -EPERM. Existing io_uring instances can still be
+  used.
+= ======================================================================
+
+
+io_uring_group
+==============
+
+When io_uring_disabled is set to 1, a process must either be
+privileged (CAP_SYS_ADMIN) or be in the io_uring_group group in order
+to create an io_uring instance.  If io_uring_group is set to -1 (the
+default), only processes with the CAP_SYS_ADMIN capability may create
+io_uring instances.
+
+
 kexec_load_disabled
 ===================
 
index f32db1f..e43c2fd 100644 (file)
@@ -726,8 +726,8 @@ same as the one describe in :ref:`BTF_Type_String`.
 4.2 .BTF.ext section
 --------------------
 
-The .BTF.ext section encodes func_info and line_info which needs loader
-manipulation before loading into the kernel.
+The .BTF.ext section encodes func_info, line_info and CO-RE relocations
+which needs loader manipulation before loading into the kernel.
 
 The specification for .BTF.ext section is defined at ``tools/lib/bpf/btf.h``
 and ``tools/lib/bpf/btf.c``.
@@ -745,15 +745,20 @@ The current header of .BTF.ext section::
         __u32   func_info_len;
         __u32   line_info_off;
         __u32   line_info_len;
+
+        /* optional part of .BTF.ext header */
+        __u32   core_relo_off;
+        __u32   core_relo_len;
     };
 
 It is very similar to .BTF section. Instead of type/string section, it
-contains func_info and line_info section. See :ref:`BPF_Prog_Load` for details
-about func_info and line_info record format.
+contains func_info, line_info and core_relo sub-sections.
+See :ref:`BPF_Prog_Load` for details about func_info and line_info
+record format.
 
 The func_info is organized as below.::
 
-     func_info_rec_size
+     func_info_rec_size              /* __u32 value */
      btf_ext_info_sec for section #1 /* func_info for section #1 */
      btf_ext_info_sec for section #2 /* func_info for section #2 */
      ...
@@ -773,7 +778,7 @@ Here, num_info must be greater than 0.
 
 The line_info is organized as below.::
 
-     line_info_rec_size
+     line_info_rec_size              /* __u32 value */
      btf_ext_info_sec for section #1 /* line_info for section #1 */
      btf_ext_info_sec for section #2 /* line_info for section #2 */
      ...
@@ -787,6 +792,20 @@ kernel API, the ``insn_off`` is the instruction offset in the unit of ``struct
 bpf_insn``. For ELF API, the ``insn_off`` is the byte offset from the
 beginning of section (``btf_ext_info_sec->sec_name_off``).
 
+The core_relo is organized as below.::
+
+     core_relo_rec_size              /* __u32 value */
+     btf_ext_info_sec for section #1 /* core_relo for section #1 */
+     btf_ext_info_sec for section #2 /* core_relo for section #2 */
+
+``core_relo_rec_size`` specifies the size of ``bpf_core_relo``
+structure when .BTF.ext is generated. All ``bpf_core_relo`` structures
+within a single ``btf_ext_info_sec`` describe relocations applied to
+section named by ``btf_ext_info_sec->sec_name_off``.
+
+See :ref:`Documentation/bpf/llvm_reloc.rst <btf-co-re-relocations>`
+for more information on CO-RE relocations.
+
 4.2 .BTF_ids section
 --------------------
 
index 1ff177b..aeaeb35 100644 (file)
@@ -29,6 +29,7 @@ that goes into great technical depth about the BPF Architecture.
    bpf_licensing
    test_debug
    clang-notes
+   linux-notes
    other
    redirect
 
index 450e640..44188e2 100644 (file)
@@ -240,3 +240,307 @@ The .BTF/.BTF.ext sections has R_BPF_64_NODYLD32 relocations::
       Offset             Info             Type               Symbol's Value  Symbol's Name
   000000000000002c  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
   0000000000000040  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
+
+.. _btf-co-re-relocations:
+
+=================
+CO-RE Relocations
+=================
+
+From object file point of view CO-RE mechanism is implemented as a set
+of CO-RE specific relocation records. These relocation records are not
+related to ELF relocations and are encoded in .BTF.ext section.
+See :ref:`Documentation/bpf/btf.rst <BTF_Ext_Section>` for more
+information on .BTF.ext structure.
+
+CO-RE relocations are applied to BPF instructions to update immediate
+or offset fields of the instruction at load time with information
+relevant for target kernel.
+
+Field to patch is selected basing on the instruction class:
+
+* For BPF_ALU, BPF_ALU64, BPF_LD `immediate` field is patched;
+* For BPF_LDX, BPF_STX, BPF_ST `offset` field is patched;
+* BPF_JMP, BPF_JMP32 instructions **should not** be patched.
+
+Relocation kinds
+================
+
+There are several kinds of CO-RE relocations that could be split in
+three groups:
+
+* Field-based - patch instruction with field related information, e.g.
+  change offset field of the BPF_LDX instruction to reflect offset
+  of a specific structure field in the target kernel.
+
+* Type-based - patch instruction with type related information, e.g.
+  change immediate field of the BPF_ALU move instruction to 0 or 1 to
+  reflect if specific type is present in the target kernel.
+
+* Enum-based - patch instruction with enum related information, e.g.
+  change immediate field of the BPF_LD_IMM64 instruction to reflect
+  value of a specific enum literal in the target kernel.
+
+The complete list of relocation kinds is represented by the following enum:
+
+.. code-block:: c
+
+ enum bpf_core_relo_kind {
+       BPF_CORE_FIELD_BYTE_OFFSET = 0,  /* field byte offset */
+       BPF_CORE_FIELD_BYTE_SIZE   = 1,  /* field size in bytes */
+       BPF_CORE_FIELD_EXISTS      = 2,  /* field existence in target kernel */
+       BPF_CORE_FIELD_SIGNED      = 3,  /* field signedness (0 - unsigned, 1 - signed) */
+       BPF_CORE_FIELD_LSHIFT_U64  = 4,  /* bitfield-specific left bitshift */
+       BPF_CORE_FIELD_RSHIFT_U64  = 5,  /* bitfield-specific right bitshift */
+       BPF_CORE_TYPE_ID_LOCAL     = 6,  /* type ID in local BPF object */
+       BPF_CORE_TYPE_ID_TARGET    = 7,  /* type ID in target kernel */
+       BPF_CORE_TYPE_EXISTS       = 8,  /* type existence in target kernel */
+       BPF_CORE_TYPE_SIZE         = 9,  /* type size in bytes */
+       BPF_CORE_ENUMVAL_EXISTS    = 10, /* enum value existence in target kernel */
+       BPF_CORE_ENUMVAL_VALUE     = 11, /* enum value integer value */
+       BPF_CORE_TYPE_MATCHES      = 12, /* type match in target kernel */
+ };
+
+Notes:
+
+* ``BPF_CORE_FIELD_LSHIFT_U64`` and ``BPF_CORE_FIELD_RSHIFT_U64`` are
+  supposed to be used to read bitfield values using the following
+  algorithm:
+
+  .. code-block:: c
+
+     // To read bitfield ``f`` from ``struct s``
+     is_signed = relo(s->f, BPF_CORE_FIELD_SIGNED)
+     off = relo(s->f, BPF_CORE_FIELD_BYTE_OFFSET)
+     sz  = relo(s->f, BPF_CORE_FIELD_BYTE_SIZE)
+     l   = relo(s->f, BPF_CORE_FIELD_LSHIFT_U64)
+     r   = relo(s->f, BPF_CORE_FIELD_RSHIFT_U64)
+     // define ``v`` as signed or unsigned integer of size ``sz``
+     v = *({s|u}<sz> *)((void *)s + off)
+     v <<= l
+     v >>= r
+
+* The ``BPF_CORE_TYPE_MATCHES`` queries matching relation, defined as
+  follows:
+
+  * for integers: types match if size and signedness match;
+  * for arrays & pointers: target types are recursively matched;
+  * for structs & unions:
+
+    * local members need to exist in target with the same name;
+
+    * for each member we recursively check match unless it is already behind a
+      pointer, in which case we only check matching names and compatible kind;
+
+  * for enums:
+
+    * local variants have to have a match in target by symbolic name (but not
+      numeric value);
+
+    * size has to match (but enum may match enum64 and vice versa);
+
+  * for function pointers:
+
+    * number and position of arguments in local type has to match target;
+    * for each argument and the return value we recursively check match.
+
+CO-RE Relocation Record
+=======================
+
+Relocation record is encoded as the following structure:
+
+.. code-block:: c
+
+ struct bpf_core_relo {
+       __u32 insn_off;
+       __u32 type_id;
+       __u32 access_str_off;
+       enum bpf_core_relo_kind kind;
+ };
+
+* ``insn_off`` - instruction offset (in bytes) within a code section
+  associated with this relocation;
+
+* ``type_id`` - BTF type ID of the "root" (containing) entity of a
+  relocatable type or field;
+
+* ``access_str_off`` - offset into corresponding .BTF string section.
+  String interpretation depends on specific relocation kind:
+
+  * for field-based relocations, string encodes an accessed field using
+    a sequence of field and array indices, separated by colon (:). It's
+    conceptually very close to LLVM's `getelementptr <GEP_>`_ instruction's
+    arguments for identifying offset to a field. For example, consider the
+    following C code:
+
+    .. code-block:: c
+
+       struct sample {
+           int a;
+           int b;
+           struct { int c[10]; };
+       } __attribute__((preserve_access_index));
+       struct sample *s;
+
+    * Access to ``s[0].a`` would be encoded as ``0:0``:
+
+      * ``0``: first element of ``s`` (as if ``s`` is an array);
+      * ``0``: index of field ``a`` in ``struct sample``.
+
+    * Access to ``s->a`` would be encoded as ``0:0`` as well.
+    * Access to ``s->b`` would be encoded as ``0:1``:
+
+      * ``0``: first element of ``s``;
+      * ``1``: index of field ``b`` in ``struct sample``.
+
+    * Access to ``s[1].c[5]`` would be encoded as ``1:2:0:5``:
+
+      * ``1``: second element of ``s``;
+      * ``2``: index of anonymous structure field in ``struct sample``;
+      * ``0``: index of field ``c`` in anonymous structure;
+      * ``5``: access to array element #5.
+
+  * for type-based relocations, string is expected to be just "0";
+
+  * for enum value-based relocations, string contains an index of enum
+     value within its enum type;
+
+* ``kind`` - one of ``enum bpf_core_relo_kind``.
+
+.. _GEP: https://llvm.org/docs/LangRef.html#getelementptr-instruction
+
+.. _btf_co_re_relocation_examples:
+
+CO-RE Relocation Examples
+=========================
+
+For the following C code:
+
+.. code-block:: c
+
+ struct foo {
+   int a;
+   int b;
+   unsigned c:15;
+ } __attribute__((preserve_access_index));
+
+ enum bar { U, V };
+
+With the following BTF definitions:
+
+.. code-block::
+
+ ...
+ [2] STRUCT 'foo' size=8 vlen=2
+        'a' type_id=3 bits_offset=0
+        'b' type_id=3 bits_offset=32
+        'c' type_id=4 bits_offset=64 bitfield_size=15
+ [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [4] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)
+ ...
+ [16] ENUM 'bar' encoding=UNSIGNED size=4 vlen=2
+        'U' val=0
+        'V' val=1
+
+Field offset relocations are generated automatically when
+``__attribute__((preserve_access_index))`` is used, for example:
+
+.. code-block:: c
+
+  void alpha(struct foo *s, volatile unsigned long *g) {
+    *g = s->a;
+    s->a = 1;
+  }
+
+  00 <alpha>:
+    0:  r3 = *(s32 *)(r1 + 0x0)
+           00:  CO-RE <byte_off> [2] struct foo::a (0:0)
+    1:  *(u64 *)(r2 + 0x0) = r3
+    2:  *(u32 *)(r1 + 0x0) = 0x1
+           10:  CO-RE <byte_off> [2] struct foo::a (0:0)
+    3:  exit
+
+
+All relocation kinds could be requested via built-in functions.
+E.g. field-based relocations:
+
+.. code-block:: c
+
+  void bravo(struct foo *s, volatile unsigned long *g) {
+    *g = __builtin_preserve_field_info(s->b, 0 /* field byte offset */);
+    *g = __builtin_preserve_field_info(s->b, 1 /* field byte size */);
+    *g = __builtin_preserve_field_info(s->b, 2 /* field existence */);
+    *g = __builtin_preserve_field_info(s->b, 3 /* field signedness */);
+    *g = __builtin_preserve_field_info(s->c, 4 /* bitfield left shift */);
+    *g = __builtin_preserve_field_info(s->c, 5 /* bitfield right shift */);
+  }
+
+  20 <bravo>:
+     4:     r1 = 0x4
+            20:  CO-RE <byte_off> [2] struct foo::b (0:1)
+     5:     *(u64 *)(r2 + 0x0) = r1
+     6:     r1 = 0x4
+            30:  CO-RE <byte_sz> [2] struct foo::b (0:1)
+     7:     *(u64 *)(r2 + 0x0) = r1
+     8:     r1 = 0x1
+            40:  CO-RE <field_exists> [2] struct foo::b (0:1)
+     9:     *(u64 *)(r2 + 0x0) = r1
+    10:     r1 = 0x1
+            50:  CO-RE <signed> [2] struct foo::b (0:1)
+    11:     *(u64 *)(r2 + 0x0) = r1
+    12:     r1 = 0x31
+            60:  CO-RE <lshift_u64> [2] struct foo::c (0:2)
+    13:     *(u64 *)(r2 + 0x0) = r1
+    14:     r1 = 0x31
+            70:  CO-RE <rshift_u64> [2] struct foo::c (0:2)
+    15:     *(u64 *)(r2 + 0x0) = r1
+    16:     exit
+
+
+Type-based relocations:
+
+.. code-block:: c
+
+  void charlie(struct foo *s, volatile unsigned long *g) {
+    *g = __builtin_preserve_type_info(*s, 0 /* type existence */);
+    *g = __builtin_preserve_type_info(*s, 1 /* type size */);
+    *g = __builtin_preserve_type_info(*s, 2 /* type matches */);
+    *g = __builtin_btf_type_id(*s, 0 /* type id in this object file */);
+    *g = __builtin_btf_type_id(*s, 1 /* type id in target kernel */);
+  }
+
+  88 <charlie>:
+    17:     r1 = 0x1
+            88:  CO-RE <type_exists> [2] struct foo
+    18:     *(u64 *)(r2 + 0x0) = r1
+    19:     r1 = 0xc
+            98:  CO-RE <type_size> [2] struct foo
+    20:     *(u64 *)(r2 + 0x0) = r1
+    21:     r1 = 0x1
+            a8:  CO-RE <type_matches> [2] struct foo
+    22:     *(u64 *)(r2 + 0x0) = r1
+    23:     r1 = 0x2 ll
+            b8:  CO-RE <local_type_id> [2] struct foo
+    25:     *(u64 *)(r2 + 0x0) = r1
+    26:     r1 = 0x2 ll
+            d0:  CO-RE <target_type_id> [2] struct foo
+    28:     *(u64 *)(r2 + 0x0) = r1
+    29:     exit
+
+Enum-based relocations:
+
+.. code-block:: c
+
+  void delta(struct foo *s, volatile unsigned long *g) {
+    *g = __builtin_preserve_enum_value(*(enum bar *)U, 0 /* enum literal existence */);
+    *g = __builtin_preserve_enum_value(*(enum bar *)V, 1 /* enum literal value */);
+  }
+
+  f0 <delta>:
+    30:     r1 = 0x1 ll
+            f0:  CO-RE <enumval_exists> [16] enum bar::U = 0
+    32:     *(u64 *)(r2 + 0x0) = r1
+    33:     r1 = 0x1 ll
+            108:  CO-RE <enumval_value> [16] enum bar::V = 1
+    35:     *(u64 *)(r2 + 0x0) = r1
+    36:     exit
diff --git a/Documentation/bpf/standardization/abi.rst b/Documentation/bpf/standardization/abi.rst
new file mode 100644 (file)
index 0000000..0c2e10e
--- /dev/null
@@ -0,0 +1,25 @@
+.. contents::
+.. sectnum::
+
+===================================================
+BPF ABI Recommended Conventions and Guidelines v1.0
+===================================================
+
+This is version 1.0 of an informational document containing recommended
+conventions and guidelines for producing portable BPF program binaries.
+
+Registers and calling convention
+================================
+
+BPF has 10 general purpose registers and a read-only frame pointer register,
+all of which are 64-bits wide.
+
+The BPF calling convention is defined as:
+
+* R0: return value from function calls, and exit value for BPF programs
+* R1 - R5: arguments for function calls
+* R6 - R9: callee saved registers that function calls will preserve
+* R10: read-only frame pointer to access stack
+
+R0 - R5 are scratch registers and BPF programs needs to spill/fill them if
+necessary across calls.
index 09c6ba0..a50c3ba 100644 (file)
@@ -12,7 +12,7 @@ for the working group charter, documents, and more.
    :maxdepth: 1
 
    instruction-set
-   linux-notes
+   abi
 
 .. Links:
 .. _IETF BPF Working Group: https://datatracker.ietf.org/wg/bpf/about/
index 4f73e9d..c5d53a6 100644 (file)
@@ -1,11 +1,11 @@
 .. contents::
 .. sectnum::
 
-========================================
-eBPF Instruction Set Specification, v1.0
-========================================
+=======================================
+BPF Instruction Set Specification, v1.0
+=======================================
 
-This document specifies version 1.0 of the eBPF instruction set.
+This document specifies version 1.0 of the BPF instruction set.
 
 Documentation conventions
 =========================
@@ -97,26 +97,10 @@ Definitions
     A:          10000110
     B: 11111111 10000110
 
-Registers and calling convention
-================================
-
-eBPF has 10 general purpose registers and a read-only frame pointer register,
-all of which are 64-bits wide.
-
-The eBPF calling convention is defined as:
-
-* R0: return value from function calls, and exit value for eBPF programs
-* R1 - R5: arguments for function calls
-* R6 - R9: callee saved registers that function calls will preserve
-* R10: read-only frame pointer to access stack
-
-R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if
-necessary across calls.
-
 Instruction encoding
 ====================
 
-eBPF has two instruction encodings:
+BPF has two instruction encodings:
 
 * the basic instruction encoding, which uses 64 bits to encode an instruction
 * the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
@@ -260,7 +244,7 @@ BPF_END    0xd0   0        byte swap operations (see `Byte swap instructions`_ b
 =========  =====  =======  ==========================================================
 
 Underflow and overflow are allowed during arithmetic operations, meaning
-the 64-bit or 32-bit value will wrap. If eBPF program execution would
+the 64-bit or 32-bit value will wrap. If BPF program execution would
 result in division by zero, the destination register is instead set to zero.
 If execution would result in modulo by zero, for ``BPF_ALU64`` the value of
 the destination register is unchanged whereas for ``BPF_ALU`` the upper
@@ -373,7 +357,7 @@ BPF_JNE   0x5    any  PC += offset if dst != src
 BPF_JSGT  0x6    any  PC += offset if dst > src                    signed
 BPF_JSGE  0x7    any  PC += offset if dst >= src                   signed
 BPF_CALL  0x8    0x0  call helper function by address              see `Helper functions`_
-BPF_CALL  0x8    0x1  call PC += offset                            see `Program-local functions`_
+BPF_CALL  0x8    0x1  call PC += imm                               see `Program-local functions`_
 BPF_CALL  0x8    0x2  call helper function by BTF ID               see `Helper functions`_
 BPF_EXIT  0x9    0x0  return                                       BPF_JMP only
 BPF_JLT   0xa    any  PC += offset if dst < src                    unsigned
@@ -382,7 +366,7 @@ BPF_JSLT  0xc    any  PC += offset if dst < src                    signed
 BPF_JSLE  0xd    any  PC += offset if dst <= src                   signed
 ========  =====  ===  ===========================================  =========================================
 
-The eBPF program needs to store the return value into register R0 before doing a
+The BPF program needs to store the return value into register R0 before doing a
 ``BPF_EXIT``.
 
 Example:
@@ -424,8 +408,8 @@ Program-local functions
 ~~~~~~~~~~~~~~~~~~~~~~~
 Program-local functions are functions exposed by the same BPF program as the
 caller, and are referenced by offset from the call instruction, similar to
-``BPF_JA``.  A ``BPF_EXIT`` within the program-local function will return to
-the caller.
+``BPF_JA``.  The offset is encoded in the imm field of the call instruction.
+A ``BPF_EXIT`` within the program-local function will return to the caller.
 
 Load and store instructions
 ===========================
@@ -502,9 +486,9 @@ Atomic operations
 
 Atomic operations are operations that operate on memory and can not be
 interrupted or corrupted by other access to the same memory region
-by other eBPF programs or means outside of this specification.
+by other BPF programs or means outside of this specification.
 
-All atomic operations supported by eBPF are encoded as store operations
+All atomic operations supported by BPF are encoded as store operations
 that use the ``BPF_ATOMIC`` mode modifier as follows:
 
 * ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
@@ -594,7 +578,7 @@ where
 Maps
 ~~~~
 
-Maps are shared memory regions accessible by eBPF programs on some platforms.
+Maps are shared memory regions accessible by BPF programs on some platforms.
 A map can have various semantics as defined in a separate document, and may or
 may not have a single contiguous memory region, but the 'map_val(map)' is
 currently only defined for maps that do have a single contiguous memory region.
@@ -616,6 +600,6 @@ identified by the given id.
 Legacy BPF Packet access instructions
 -------------------------------------
 
-eBPF previously introduced special instructions for access to packet data that were
+BPF previously introduced special instructions for access to packet data that were
 carried over from classic BPF. However, these instructions are
 deprecated and should no longer be used.
index dfe7e75..4451ef5 100644 (file)
@@ -15,9 +15,10 @@ Integer types
 
        If variable is of Type,         use printk format specifier:
        ------------------------------------------------------------
-               char                    %d or %x
+               signed char             %d or %hhx
                unsigned char           %u or %x
-               short int               %d or %x
+               char                    %u or %x
+               short int               %d or %hx
                unsigned short int      %u or %x
                int                     %d or %x
                unsigned int            %u or %x
@@ -27,9 +28,9 @@ Integer types
                unsigned long long      %llu or %llx
                size_t                  %zu or %zx
                ssize_t                 %zd or %zx
-               s8                      %d or %x
+               s8                      %d or %hhx
                u8                      %u or %x
-               s16                     %d or %x
+               s16                     %d or %hx
                u16                     %u or %x
                s32                     %d or %x
                u32                     %u or %x
index f4acf9c..382818a 100644 (file)
@@ -41,8 +41,8 @@ Support
 Architectures
 ~~~~~~~~~~~~~
 
-Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
-xtensa, and the tag-based KASAN modes are supported only on arm64.
+Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa,
+and loongarch, and the tag-based KASAN modes are supported only on arm64.
 
 Compilers
 ~~~~~~~~~
diff --git a/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml b/Documentation/devicetree/bindings/cache/andestech,ax45mp-cache.yaml
new file mode 100644 (file)
index 0000000..9ab5f0c
--- /dev/null
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright (C) 2023 Renesas Electronics Corp.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/cache/andestech,ax45mp-cache.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Andestech AX45MP L2 Cache Controller
+
+maintainers:
+  - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
+
+description:
+  A level-2 cache (L2C) is used to improve the system performance by providing
+  a large amount of cache line entries and reasonable access delays. The L2C
+  is shared between cores, and a non-inclusive non-exclusive policy is used.
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - andestech,ax45mp-cache
+
+  required:
+    - compatible
+
+properties:
+  compatible:
+    items:
+      - const: andestech,ax45mp-cache
+      - const: cache
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  cache-line-size:
+    const: 64
+
+  cache-level:
+    const: 2
+
+  cache-sets:
+    const: 1024
+
+  cache-size:
+    enum: [131072, 262144, 524288, 1048576, 2097152]
+
+  cache-unified: true
+
+  next-level-cache: true
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - cache-line-size
+  - cache-level
+  - cache-sets
+  - cache-size
+  - cache-unified
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    cache-controller@2010000 {
+        compatible = "andestech,ax45mp-cache", "cache";
+        reg = <0x13400000 0x100000>;
+        interrupts = <508 IRQ_TYPE_LEVEL_HIGH>;
+        cache-line-size = <64>;
+        cache-level = <2>;
+        cache-sets = <1024>;
+        cache-size = <262144>;
+        cache-unified;
+    };
index ff57c54..9f1d35c 100644 (file)
@@ -48,6 +48,9 @@ properties:
     default: 16
     enum: [2, 4, 8, 16, 32, 64, 128, 256]
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt b/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt
deleted file mode 100644 (file)
index 548a73c..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-GPIO-based I2C Arbitration Using a Challenge & Response Mechanism
-=================================================================
-This uses GPIO lines and a challenge & response mechanism to arbitrate who is
-the master of an I2C bus in a multimaster situation.
-
-In many cases using GPIOs to arbitrate is not needed and a design can use
-the standard I2C multi-master rules.  Using GPIOs is generally useful in
-the case where there is a device on the bus that has errata and/or bugs
-that makes standard multimaster mode not feasible.
-
-Note that this scheme works well enough but has some downsides:
-* It is nonstandard (not using standard I2C multimaster)
-* Having two masters on a bus in general makes it relatively hard to debug
-  problems (hard to tell if i2c issues were caused by one master, another, or
-  some device on the bus).
-
-
-Algorithm:
-
-All masters on the bus have a 'bus claim' line which is an output that the
-others can see. These are all active low with pull-ups enabled.  We'll
-describe these lines as:
-
-- OUR_CLAIM: output from us signaling to other hosts that we want the bus
-- THEIR_CLAIMS: output from others signaling that they want the bus
-
-The basic algorithm is to assert your line when you want the bus, then make
-sure that the other side doesn't want it also.  A detailed explanation is best
-done with an example.
-
-Let's say we want to claim the bus.  We:
-1. Assert OUR_CLAIM.
-2. Waits a little bit for the other sides to notice (slew time, say 10
-   microseconds).
-3. Check THEIR_CLAIMS.  If none are asserted then the we have the bus and we are
-   done.
-4. Otherwise, wait for a few milliseconds and see if THEIR_CLAIMS are released.
-5. If not, back off, release the claim and wait for a few more milliseconds.
-6. Go back to 1 (until retry time has expired).
-
-
-Required properties:
-- compatible: i2c-arb-gpio-challenge
-- our-claim-gpio: The GPIO that we use to claim the bus.
-- their-claim-gpios: The GPIOs that the other sides use to claim the bus.
-  Note that some implementations may only support a single other master.
-- I2C arbitration bus node. See i2c-arb.txt in this directory.
-
-Optional properties:
-- slew-delay-us: microseconds to wait for a GPIO to go high. Default is 10 us.
-- wait-retry-us: we'll attempt another claim after this many microseconds.
-    Default is 3000 us.
-- wait-free-us: we'll give up after this many microseconds. Default is 50000 us.
-
-
-Example:
-       i2c@12ca0000 {
-               compatible = "acme,some-i2c-device";
-               #address-cells = <1>;
-               #size-cells = <0>;
-       };
-
-       i2c-arbitrator {
-               compatible = "i2c-arb-gpio-challenge";
-
-               i2c-parent = <&{/i2c@12CA0000}>;
-
-               our-claim-gpio = <&gpf0 3 1>;
-               their-claim-gpios = <&gpe0 4 1>;
-               slew-delay-us = <10>;
-               wait-retry-us = <3000>;
-               wait-free-us = <50000>;
-
-               i2c-arb {
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-
-                       i2c@52 {
-                               // Normal I2C device
-                       };
-               };
-       };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.yaml b/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.yaml
new file mode 100644 (file)
index 0000000..b618b5a
--- /dev/null
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/i2c-arb-gpio-challenge.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GPIO-based I2C Arbitration Using a Challenge & Response Mechanism
+
+maintainers:
+  - Doug Anderson <dianders@chromium.org>
+  - Peter Rosin <peda@axentia.se>
+
+description: |
+  This uses GPIO lines and a challenge & response mechanism to arbitrate who is
+  the master of an I2C bus in a multimaster situation.
+
+  In many cases using GPIOs to arbitrate is not needed and a design can use the
+  standard I2C multi-master rules.  Using GPIOs is generally useful in the case
+  where there is a device on the bus that has errata and/or bugs that makes
+  standard multimaster mode not feasible.
+
+  Note that this scheme works well enough but has some downsides:
+   * It is nonstandard (not using standard I2C multimaster)
+   * Having two masters on a bus in general makes it relatively hard to debug
+     problems (hard to tell if i2c issues were caused by one master, another,
+     or some device on the bus).
+
+  Algorithm:
+  All masters on the bus have a 'bus claim' line which is an output that the
+  others can see. These are all active low with pull-ups enabled.  We'll
+  describe these lines as:
+   * OUR_CLAIM: output from us signaling to other hosts that we want the bus
+   * THEIR_CLAIMS: output from others signaling that they want the bus
+
+  The basic algorithm is to assert your line when you want the bus, then make
+  sure that the other side doesn't want it also.  A detailed explanation is
+  best done with an example.
+
+  Let's say we want to claim the bus.  We:
+  1. Assert OUR_CLAIM.
+  2. Waits a little bit for the other sides to notice (slew time, say 10
+     microseconds).
+  3. Check THEIR_CLAIMS.  If none are asserted then the we have the bus and we
+     are done.
+  4. Otherwise, wait for a few milliseconds and see if THEIR_CLAIMS are released.
+  5. If not, back off, release the claim and wait for a few more milliseconds.
+  6. Go back to 1 (until retry time has expired).
+
+properties:
+  compatible:
+    const: i2c-arb-gpio-challenge
+
+  i2c-parent:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      The I2C bus that this multiplexer's master-side port is connected to.
+
+  our-claim-gpios:
+    maxItems: 1
+    description:
+      The GPIO that we use to claim the bus.
+
+  slew-delay-us:
+    default: 10
+    description:
+      Time to wait for a GPIO to go high.
+
+  their-claim-gpios:
+    minItems: 1
+    maxItems: 8
+    description:
+      The GPIOs that the other sides use to claim the bus.  Note that some
+      implementations may only support a single other master.
+
+  wait-free-us:
+    default: 50000
+    description:
+      We'll give up after this many microseconds.
+
+  wait-retry-us:
+    default: 3000
+    description:
+      We'll attempt another claim after this many microseconds.
+
+  i2c-arb:
+    type: object
+    $ref: /schemas/i2c/i2c-controller.yaml
+    unevaluatedProperties: false
+    description:
+      I2C arbitration bus node.
+
+required:
+  - compatible
+  - i2c-arb
+  - our-claim-gpios
+  - their-claim-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c-arbitrator {
+        compatible = "i2c-arb-gpio-challenge";
+        i2c-parent = <&i2c_4>;
+
+        our-claim-gpios = <&gpf0 3 GPIO_ACTIVE_LOW>;
+        their-claim-gpios = <&gpe0 4 GPIO_ACTIVE_LOW>;
+        slew-delay-us = <10>;
+        wait-retry-us = <3000>;
+        wait-free-us = <50000>;
+
+        i2c-arb {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            sbs-battery@b {
+                compatible = "sbs,sbs-battery";
+                reg = <0xb>;
+                sbs,poll-retry-count = <1>;
+            };
+
+            embedded-controller@1e {
+                compatible = "google,cros-ec-i2c";
+                reg = <0x1e>;
+                interrupts = <6 IRQ_TYPE_LEVEL_HIGH>;
+                interrupt-parent = <&gpx1>;
+                pinctrl-names = "default";
+                pinctrl-0 = <&ec_irq>;
+                wakeup-source;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-arb.txt b/Documentation/devicetree/bindings/i2c/i2c-arb.txt
deleted file mode 100644 (file)
index 59abf92..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-Common i2c arbitration bus properties.
-
-- i2c-arb child node
-
-Required properties for the i2c-arb child node:
-- #address-cells = <1>;
-- #size-cells = <0>;
-
-Optional properties for i2c-arb child node:
-- Child nodes conforming to i2c bus binding
-
-
-Example :
-
-       /*
-          An NXP pca9541 I2C bus master selector at address 0x74
-          with a NXP pca8574 GPIO expander attached.
-        */
-
-       arb@74 {
-               compatible = "nxp,pca9541";
-               reg = <0x74>;
-
-               i2c-arb {
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-
-                       gpio@38 {
-                               compatible = "nxp,pca8574";
-                               reg = <0x38>;
-                               #gpio-cells = <2>;
-                               gpio-controller;
-                       };
-               };
-       };
index 9f1726d..2d7bb99 100644 (file)
@@ -4,21 +4,29 @@
 $id: http://devicetree.org/schemas/i2c/i2c-mux-pca954x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP PCA954x I2C bus switch
+title: NXP PCA954x I2C and compatible bus switches
 
 maintainers:
   - Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 
 description:
-  The binding supports NXP PCA954x and PCA984x I2C mux/switch devices.
-
-allOf:
-  - $ref: /schemas/i2c/i2c-mux.yaml#
+  The NXP PCA954x and compatible devices are I2C bus
+  multiplexer/switches that share the same functionality
+  and register layout.
+  The devices usually have 4 or 8 child buses, which are
+  attached to the parent bus by using the SMBus "Send Byte"
+  command.
 
 properties:
   compatible:
     oneOf:
       - enum:
+          - maxim,max7356
+          - maxim,max7357
+          - maxim,max7358
+          - maxim,max7367
+          - maxim,max7368
+          - maxim,max7369
           - nxp,pca9540
           - nxp,pca9542
           - nxp,pca9543
@@ -59,10 +67,34 @@ properties:
     description: if present, overrides i2c-mux-idle-disconnect
     $ref: /schemas/mux/mux-controller.yaml#/properties/idle-state
 
+  vdd-supply:
+    description: A voltage regulator supplying power to the chip. On PCA9846
+      the regulator supplies power to VDD2 (core logic) and optionally to VDD1.
+
 required:
   - compatible
   - reg
 
+allOf:
+  - $ref: /schemas/i2c/i2c-mux.yaml#
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              enum:
+                - maxim,max7367
+                - maxim,max7369
+                - nxp,pca9542
+                - nxp,pca9543
+                - nxp,pca9544
+                - nxp,pca9545
+    then:
+      properties:
+        interrupts: false
+        "#interrupt-cells": false
+        interrupt-controller: false
+
 unevaluatedProperties: false
 
 examples:
@@ -74,11 +106,13 @@ examples:
         #size-cells = <0>;
 
         i2c-mux@74 {
-            compatible = "nxp,pca9548";
+            compatible = "nxp,pca9545";
             #address-cells = <1>;
             #size-cells = <0>;
             reg = <0x74>;
 
+            vdd-supply = <&p3v3>;
+
             interrupt-parent = <&ipic>;
             interrupts = <17 IRQ_TYPE_LEVEL_LOW>;
             interrupt-controller;
diff --git a/Documentation/devicetree/bindings/i2c/nxp,pca9541.txt b/Documentation/devicetree/bindings/i2c/nxp,pca9541.txt
deleted file mode 100644 (file)
index 42bfc09..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-* NXP PCA9541 I2C bus master selector
-
-Required Properties:
-
-  - compatible: Must be "nxp,pca9541"
-
-  - reg: The I2C address of the device.
-
-  The following required properties are defined externally:
-
-  - I2C arbitration bus node. See i2c-arb.txt in this directory.
-
-
-Example:
-
-       i2c-arbitrator@74 {
-               compatible = "nxp,pca9541";
-               reg = <0x74>;
-
-               i2c-arb {
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-
-                       eeprom@54 {
-                               compatible = "atmel,24c08";
-                               reg = <0x54>;
-                       };
-               };
-       };
diff --git a/Documentation/devicetree/bindings/i2c/nxp,pca9541.yaml b/Documentation/devicetree/bindings/i2c/nxp,pca9541.yaml
new file mode 100644 (file)
index 0000000..b65c25c
--- /dev/null
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/nxp,pca9541.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP PCA9541 I2C bus master selector
+
+maintainers:
+  - Peter Rosin <peda@axentia.se>
+
+properties:
+  compatible:
+    const: nxp,pca9541
+
+  reg:
+    maxItems: 1
+
+  i2c-arb:
+    type: object
+    $ref: /schemas/i2c/i2c-controller.yaml
+    unevaluatedProperties: false
+    description:
+      I2C arbitration bus node.
+
+required:
+  - compatible
+  - reg
+  - i2c-arb
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        i2c-arbitrator@74 {
+            compatible = "nxp,pca9541";
+            reg = <0x74>;
+
+            i2c-arb {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                eeprom@54 {
+                    compatible = "atmel,24c08";
+                    reg = <0x54>;
+                };
+            };
+        };
+    };
index ec79b72..042d4dc 100644 (file)
@@ -269,6 +269,7 @@ examples:
                 port {
                     ov7251_ep: endpoint {
                         data-lanes = <0 1>;
+                        link-frequencies = /bits/ 64 <240000000 319200000>;
                         remote-endpoint = <&csiphy3_ep>;
                     };
                 };
index fdb4212..ab69f41 100644 (file)
@@ -135,9 +135,10 @@ patternProperties:
         minimum: 0x1
         maximum: 0xff
         description: |
-          Dynamic address to be assigned to this device. This property is only
-          valid if the I3C device has a static address (first cell of the reg
-          property != 0).
+          Dynamic address to be assigned to this device. In case static address is
+          present (first cell of the reg property != 0), this address is assigned
+          through SETDASA. If static address is not present, this address is assigned
+          through SETNEWDA after assigning a temporary address via ENTDAA.
 
     required:
       - reg
@@ -163,12 +164,18 @@ examples:
             pagesize = <0x8>;
         };
 
-        /* I3C device with a static I2C address. */
+        /* I3C device with a static I2C address and assigned address. */
         thermal_sensor: sensor@68,39200144004 {
             reg = <0x68 0x392 0x144004>;
             assigned-address = <0xa>;
         };
 
+        /* I3C device with only assigned address. */
+        pressure_sensor: sensor@0,39200124004 {
+            reg = <0x0 0x392 0x124000>;
+            assigned-address = <0xc>;
+        };
+
         /*
          * I3C device without a static I2C address but requiring
          * resources described in the DT.
index 9ddba7f..5b1769c 100644 (file)
@@ -4,14 +4,14 @@
 $id: http://devicetree.org/schemas/input/azoteq,iqs7222.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Azoteq IQS7222A/B/C Capacitive Touch Controller
+title: Azoteq IQS7222A/B/C/D Capacitive Touch Controller
 
 maintainers:
   - Jeff LaBundy <jeff@labundy.com>
 
 description: |
-  The Azoteq IQS7222A, IQS7222B and IQS7222C are multichannel capacitive touch
-  controllers that feature additional sensing capabilities.
+  The Azoteq IQS7222A, IQS7222B, IQS7222C and IQS7222D are multichannel
+  capacitive touch controllers that feature additional sensing capabilities.
 
   Link to datasheets: https://www.azoteq.com/
 
@@ -21,6 +21,7 @@ properties:
       - azoteq,iqs7222a
       - azoteq,iqs7222b
       - azoteq,iqs7222c
+      - azoteq,iqs7222d
 
   reg:
     maxItems: 1
@@ -173,6 +174,152 @@ properties:
     maximum: 3000
     description: Specifies the report rate (in ms) during ultra-low-power mode.
 
+  touchscreen-size-x: true
+  touchscreen-size-y: true
+  touchscreen-inverted-x: true
+  touchscreen-inverted-y: true
+  touchscreen-swapped-x-y: true
+
+  trackpad:
+    type: object
+    description: Represents all channels associated with the trackpad.
+
+    properties:
+      azoteq,channel-select:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 12
+        items:
+          minimum: 0
+          maximum: 13
+        description:
+          Specifies the order of the channels that participate in the trackpad.
+          Specify 255 to omit a given channel for the purpose of mapping a non-
+          rectangular trackpad.
+
+      azoteq,num-rows:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 12
+        description: Specifies the number of rows that comprise the trackpad.
+
+      azoteq,num-cols:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 12
+        description: Specifies the number of columns that comprise the trackpad.
+
+      azoteq,top-speed:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        multipleOf: 4
+        minimum: 0
+        maximum: 1020
+        description:
+          Specifies the speed (in coordinates traveled per conversion) after
+          which coordinate filtering is no longer applied.
+
+      azoteq,bottom-speed:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description:
+          Specifies the speed (in coordinates traveled per conversion) after
+          which coordinate filtering is linearly reduced.
+
+      azoteq,use-prox:
+        type: boolean
+        description:
+          Directs the trackpad to respond to the proximity states of the
+          selected channels instead of their corresponding touch states.
+          Note the trackpad cannot report granular coordinates during a
+          state of proximity.
+
+    patternProperties:
+      "^azoteq,lower-cal-(x|y)$":
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's lower starting points.
+
+      "^azoteq,upper-cal-(x|y)$":
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's upper starting points.
+
+      "^event-(press|tap|(swipe|flick)-(x|y)-(pos|neg))$":
+        type: object
+        $ref: input.yaml#
+        description:
+          Represents a press or gesture event reported by the trackpad. Specify
+          'linux,code' under the press event to report absolute coordinates.
+
+        properties:
+          linux,code: true
+
+          azoteq,gesture-angle-tighten:
+            type: boolean
+            description:
+              Limits the tangent of the gesture angle to 0.5 (axial gestures
+              only). If specified in one direction, the effect is applied in
+              either direction.
+
+          azoteq,gesture-max-ms:
+            multipleOf: 16
+            minimum: 0
+            maximum: 4080
+            description:
+              Specifies the length of time (in ms) within which a tap, swipe
+              or flick gesture must be completed in order to be acknowledged
+              by the device. The number specified for any one swipe or flick
+              gesture applies to all other swipe or flick gestures.
+
+          azoteq,gesture-min-ms:
+            multipleOf: 16
+            minimum: 0
+            maximum: 4080
+            description:
+              Specifies the length of time (in ms) for which a tap gesture must
+              be held in order to be acknowledged by the device.
+
+          azoteq,gesture-dist:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 65535
+            description:
+              Specifies the distance (in coordinates) across which a swipe or
+              flick gesture must travel in order to be acknowledged by the
+              device. The number specified for any one swipe or flick gesture
+              applies to all remaining swipe or flick gestures.
+
+              For tap gestures, this property specifies the distance from the
+              original point of contact across which the contact is permitted
+              to travel before the gesture is rejected by the device.
+
+          azoteq,gpio-select:
+            $ref: /schemas/types.yaml#/definitions/uint32-array
+            minItems: 1
+            maxItems: 3
+            items:
+              minimum: 0
+              maximum: 2
+            description: |
+              Specifies one or more GPIO mapped to the event as follows:
+              0: GPIO0
+              1: GPIO3
+              2: GPIO4
+
+              Note that although multiple events can be mapped to a single
+              GPIO, they must all be of the same type (proximity, touch or
+              trackpad gesture).
+
+        additionalProperties: false
+
+    required:
+      - azoteq,channel-select
+
+    additionalProperties: false
+
 patternProperties:
   "^cycle-[0-9]$":
     type: object
@@ -288,6 +435,10 @@ patternProperties:
           Activates the reference channel in response to proximity events
           instead of touch events.
 
+      azoteq,counts-filt-enable:
+        type: boolean
+        description: Applies counts filtering to the channel.
+
       azoteq,ati-band:
         $ref: /schemas/types.yaml#/definitions/uint32
         enum: [0, 1, 2, 3]
@@ -432,12 +583,12 @@ patternProperties:
             description: |
               Specifies one or more GPIO mapped to the event as follows:
               0: GPIO0
-              1: GPIO3 (IQS7222C only)
-              2: GPIO4 (IQS7222C only)
+              1: GPIO3
+              2: GPIO4
 
               Note that although multiple events can be mapped to a single
               GPIO, they must all be of the same type (proximity, touch or
-              slider gesture).
+              slider/trackpad gesture).
 
           azoteq,thresh:
             $ref: /schemas/types.yaml#/definitions/uint32
@@ -521,16 +672,16 @@ patternProperties:
         minimum: 0
         maximum: 65535
         description:
-          Specifies the speed of movement after which coordinate filtering is
-          no longer applied.
+          Specifies the speed (in coordinates traveled per conversion) after
+          which coordinate filtering is no longer applied.
 
       azoteq,bottom-speed:
         $ref: /schemas/types.yaml#/definitions/uint32
         minimum: 0
         maximum: 255
         description:
-          Specifies the speed of movement after which coordinate filtering is
-          linearly reduced.
+          Specifies the speed (in coordinates traveled per conversion) after
+          which coordinate filtering is linearly reduced.
 
       azoteq,bottom-beta:
         $ref: /schemas/types.yaml#/definitions/uint32
@@ -595,10 +746,10 @@ patternProperties:
             minimum: 0
             maximum: 4080
             description:
-              Specifies the distance across which a swipe or flick gesture must
-              travel in order to be acknowledged by the device. The number spec-
-              ified for any one swipe or flick gesture applies to all remaining
-              swipe or flick gestures.
+              Specifies the distance (in coordinates) across which a swipe or
+              flick gesture must travel in order to be acknowledged by the
+              device. The number specified for any one swipe or flick gesture
+              applies to all remaining swipe or flick gestures.
 
           azoteq,gpio-select:
             $ref: /schemas/types.yaml#/definitions/uint32-array
@@ -610,8 +761,8 @@ patternProperties:
             description: |
               Specifies one or more GPIO mapped to the event as follows:
               0: GPIO0
-              1: GPIO3 (IQS7222C only)
-              2: GPIO4 (IQS7222C only)
+              1: GPIO3
+              2: GPIO4
 
               Note that although multiple events can be mapped to a single
               GPIO, they must all be of the same type (proximity, touch or
@@ -629,8 +780,8 @@ patternProperties:
     description: |
       Represents a GPIO mapped to one or more events as follows:
       gpio-0: GPIO0
-      gpio-1: GPIO3 (IQS7222C only)
-      gpio-2: GPIO4 (IQS7222C only)
+      gpio-1: GPIO3
+      gpio-2: GPIO4
 
     allOf:
       - $ref: ../pinctrl/pincfg-node.yaml#
@@ -641,11 +792,53 @@ patternProperties:
     additionalProperties: false
 
 allOf:
+  - $ref: touchscreen/touchscreen.yaml#
+
   - if:
       properties:
         compatible:
           contains:
-            const: azoteq,iqs7222b
+            enum:
+              - azoteq,iqs7222a
+              - azoteq,iqs7222b
+              - azoteq,iqs7222c
+
+    then:
+      properties:
+        touchscreen-size-x: false
+        touchscreen-size-y: false
+        touchscreen-inverted-x: false
+        touchscreen-inverted-y: false
+        touchscreen-swapped-x-y: false
+
+        trackpad: false
+
+      patternProperties:
+        "^channel-([0-9]|1[0-9])$":
+          properties:
+            azoteq,counts-filt-enable: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - azoteq,iqs7222b
+              - azoteq,iqs7222c
+
+    then:
+      patternProperties:
+        "^channel-([0-9]|1[0-9])$":
+          properties:
+            azoteq,ulp-allow: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - azoteq,iqs7222b
+              - azoteq,iqs7222d
 
     then:
       patternProperties:
@@ -657,13 +850,22 @@ allOf:
           properties:
             azoteq,ref-select: false
 
+        "^slider-[0-1]$": false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: azoteq,iqs7222b
+
+    then:
+      patternProperties:
+        "^channel-([0-9]|1[0-9])$":
           patternProperties:
             "^event-(prox|touch)$":
               properties:
                 azoteq,gpio-select: false
 
-        "^slider-[0-1]$": false
-
         "^gpio-[0-2]$": false
 
   - if:
@@ -704,10 +906,6 @@ allOf:
 
     else:
       patternProperties:
-        "^channel-([0-9]|1[0-9])$":
-          properties:
-            azoteq,ulp-allow: false
-
         "^slider-[0-1]$":
           patternProperties:
             "^event-(press|tap|(swipe|flick)-(pos|neg))$":
diff --git a/Documentation/devicetree/bindings/input/stmpe-keypad.txt b/Documentation/devicetree/bindings/input/stmpe-keypad.txt
deleted file mode 100644 (file)
index 12bb771..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-* STMPE Keypad
-
-Required properties:
- - compatible               : "st,stmpe-keypad"
- - linux,keymap             : See ./matrix-keymap.txt
-
-Optional properties:
- - debounce-interval        : Debouncing interval time in milliseconds
- - st,scan-count            : Scanning cycles elapsed before key data is updated
- - st,no-autorepeat         : If specified device will not autorepeat
- - keypad,num-rows          : See ./matrix-keymap.txt
- - keypad,num-columns       : See ./matrix-keymap.txt
-
-Example:
-
-       stmpe_keypad {
-               compatible = "st,stmpe-keypad";
-
-               debounce-interval = <64>;
-               st,scan-count = <8>;
-               st,no-autorepeat;
-
-               linux,keymap = <0x205006b
-                               0x4010074
-                               0x3050072
-                               0x1030004
-                               0x502006a
-                               0x500000a
-                               0x5008b
-                               0x706001c
-                               0x405000b
-                               0x6070003
-                               0x3040067
-                               0x303006c
-                               0x60400e7
-                               0x602009e
-                               0x4020073
-                               0x5050002
-                               0x4030069
-                               0x3020008>;
-       };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/azoteq,iqs7211.yaml b/Documentation/devicetree/bindings/input/touchscreen/azoteq,iqs7211.yaml
new file mode 100644 (file)
index 0000000..8cf371b
--- /dev/null
@@ -0,0 +1,769 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/azoteq,iqs7211.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Azoteq IQS7210A/7211A/E Trackpad/Touchscreen Controller
+
+maintainers:
+  - Jeff LaBundy <jeff@labundy.com>
+
+description: |
+  The Azoteq IQS7210A, IQS7211A and IQS7211E trackpad and touchscreen control-
+  lers employ projected-capacitance sensing and can track two contacts.
+
+  Link to datasheets: https://www.azoteq.com/
+
+properties:
+  compatible:
+    enum:
+      - azoteq,iqs7210a
+      - azoteq,iqs7211a
+      - azoteq,iqs7211e
+
+  reg:
+    maxItems: 1
+
+  irq-gpios:
+    maxItems: 1
+    description:
+      Specifies the GPIO connected to the device's active-low RDY output. The
+      pin doubles as the IQS7211E's active-low MCLR input, in which case this
+      GPIO must be configured as open-drain.
+
+  reset-gpios:
+    maxItems: 1
+    description:
+      Specifies the GPIO connected to the device's active-low MCLR input. The
+      device is temporarily held in hardware reset prior to initialization if
+      this property is present.
+
+  azoteq,forced-comms:
+    type: boolean
+    description:
+      Enables forced communication; to be used with host adapters that cannot
+      tolerate clock stretching.
+
+  azoteq,forced-comms-default:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1]
+    description:
+      Indicates if the device's OTP memory enables (1) or disables (0) forced
+      communication by default. Specifying this property can expedite startup
+      time if the default value is known.
+
+      If this property is not specified, communication is not initiated until
+      the device asserts its RDY pin shortly after exiting hardware reset. At
+      that point, forced communication is either enabled or disabled based on
+      the presence or absence of the 'azoteq,forced-comms' property.
+
+  azoteq,rate-active-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during active mode.
+
+  azoteq,rate-touch-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during idle-touch mode.
+
+  azoteq,rate-idle-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during idle mode.
+
+  azoteq,rate-lp1-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during low-power mode 1.
+
+  azoteq,rate-lp2-ms:
+    minimum: 0
+    maximum: 65535
+    description: Specifies the report rate (in ms) during low-power mode 2.
+
+  azoteq,timeout-active-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 65535000
+    description:
+      Specifies the length of time (in ms) to wait for an event before moving
+      from active mode to idle or idle-touch modes.
+
+  azoteq,timeout-touch-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 65535000
+    description:
+      Specifies the length of time (in ms) to wait for an event before moving
+      from idle-touch mode to idle mode.
+
+  azoteq,timeout-idle-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 65535000
+    description:
+      Specifies the length of time (in ms) to wait for an event before moving
+      from idle mode to low-power mode 1.
+
+  azoteq,timeout-lp1-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 65535000
+    description:
+      Specifies the length of time (in ms) to wait for an event before moving
+      from low-power mode 1 to low-power mode 2.
+
+  azoteq,timeout-lp2-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 60000
+    description:
+      Specifies the rate (in ms) at which the trackpad reference values
+      are updated during low-power modes 1 and 2.
+
+  azoteq,timeout-ati-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 60000
+    description:
+      Specifies the delay (in ms) before the automatic tuning implementation
+      (ATI) is retried in the event it fails to complete.
+
+  azoteq,timeout-comms-ms:
+    minimum: 0
+    maximum: 65535
+    description:
+      Specifies the delay (in ms) before a communication window is closed.
+
+  azoteq,timeout-press-ms:
+    multipleOf: 1000
+    minimum: 0
+    maximum: 60000
+    description:
+      Specifies the length of time (in ms) to wait before automatically
+      releasing a press event. Specify zero to allow the press state to
+      persist indefinitely.
+
+  azoteq,fosc-freq:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1]
+    description: |
+      Specifies the device's core clock frequency as follows:
+      0: 14 MHz
+      1: 18 MHz
+
+  azoteq,fosc-trim:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 15
+    description: Specifies the device's core clock frequency trim.
+
+  azoteq,num-contacts:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 2
+    default: 0
+    description: Specifies the number of contacts reported by the device.
+
+  azoteq,contact-split:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 255
+    description: Specifies the contact (finger) split factor.
+
+  azoteq,trim-x:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 255
+    description: Specifies the horizontal trim width.
+
+  azoteq,trim-y:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 255
+    description: Specifies the vertical trim height.
+
+  trackpad:
+    type: object
+    description: Represents all channels associated with the trackpad.
+
+    properties:
+      azoteq,rx-enable:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 8
+        items:
+          minimum: 0
+          maximum: 7
+        description:
+          Specifies the order of the CRx pin(s) associated with the trackpad.
+
+      azoteq,tx-enable:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 12
+        items:
+          minimum: 0
+          maximum: 11
+        description:
+          Specifies the order of the CTx pin(s) associated with the trackpad.
+
+      azoteq,channel-select:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 36
+        items:
+          minimum: 0
+          maximum: 255
+        description: |
+          Specifies the channels mapped to each cycle in the following order:
+          Cycle 0, slot 0
+          Cycle 0, slot 1
+          Cycle 1, slot 0
+          Cycle 1, slot 1
+          ...and so on. Specify 255 to disable a given slot.
+
+      azoteq,ati-frac-div-fine:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the trackpad's ATI fine fractional divider.
+
+      azoteq,ati-frac-mult-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 15
+        description: Specifies the trackpad's ATI coarse fractional multiplier.
+
+      azoteq,ati-frac-div-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the trackpad's ATI coarse fractional divider.
+
+      azoteq,ati-comp-div:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the trackpad's ATI compensation divider.
+
+      azoteq,ati-target:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the trackpad's ATI target.
+
+      azoteq,touch-enter:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's touch entrance factor.
+
+      azoteq,touch-exit:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's touch exit factor.
+
+      azoteq,thresh:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's stationary touch threshold.
+
+      azoteq,conv-period:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's conversion period.
+
+      azoteq,conv-frac:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the trackpad's conversion frequency fraction.
+
+    patternProperties:
+      "^event-(tap(-double|-triple)?|hold|palm|swipe-(x|y)-(pos|neg)(-hold)?)$":
+        type: object
+        $ref: ../input.yaml#
+        description:
+          Represents a gesture event reported by the trackpad. In the case of
+          axial gestures, the duration or distance specified in one direction
+          applies to both directions along the same axis.
+
+        properties:
+          linux,code: true
+
+          azoteq,gesture-max-ms:
+            minimum: 0
+            maximum: 65535
+            description: Specifies the maximum duration of tap/swipe gestures.
+
+          azoteq,gesture-mid-ms:
+            minimum: 0
+            maximum: 65535
+            description:
+              Specifies the maximum duration between subsequent tap gestures
+              (IQS7211E only).
+
+          azoteq,gesture-min-ms:
+            minimum: 0
+            maximum: 65535
+            description: Specifies the minimum duration of hold gestures.
+
+          azoteq,gesture-dist:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 65535
+            description:
+              Specifies the minimum (swipe) or maximum (tap and hold) distance
+              a finger may travel to be considered a gesture.
+
+          azoteq,gesture-dist-rep:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 65535
+            description:
+              Specifies the minimum distance a finger must travel to elicit a
+              repeated swipe gesture (IQS7211E only).
+
+          azoteq,gesture-angle:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 75
+            description:
+              Specifies the maximum angle (in degrees) a finger may travel to
+              be considered a swipe gesture.
+
+          azoteq,thresh:
+            $ref: /schemas/types.yaml#/definitions/uint32
+            minimum: 0
+            maximum: 42
+            description: Specifies the palm gesture threshold (IQS7211E only).
+
+        additionalProperties: false
+
+    dependencies:
+      azoteq,rx-enable: ["azoteq,tx-enable"]
+      azoteq,tx-enable: ["azoteq,rx-enable"]
+      azoteq,channel-select: ["azoteq,rx-enable"]
+
+    additionalProperties: false
+
+  alp:
+    type: object
+    $ref: ../input.yaml#
+    description: Represents the alternate low-power channel (ALP).
+
+    properties:
+      azoteq,rx-enable:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 8
+        items:
+          minimum: 0
+          maximum: 7
+        description:
+          Specifies the CRx pin(s) associated with the ALP in no particular
+          order.
+
+      azoteq,tx-enable:
+        $ref: /schemas/types.yaml#/definitions/uint32-array
+        minItems: 1
+        maxItems: 12
+        items:
+          minimum: 0
+          maximum: 11
+        description:
+          Specifies the CTx pin(s) associated with the ALP in no particular
+          order.
+
+      azoteq,ati-frac-div-fine:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the ALP's ATI fine fractional divider.
+
+      azoteq,ati-frac-mult-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 15
+        description: Specifies the ALP's ATI coarse fractional multiplier.
+
+      azoteq,ati-frac-div-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the ALP's ATI coarse fractional divider.
+
+      azoteq,ati-comp-div:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the ALP's ATI compensation divider.
+
+      azoteq,ati-target:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the ALP's ATI target.
+
+      azoteq,ati-base:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        multipleOf: 8
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's ATI base.
+
+      azoteq,ati-mode:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1]
+        description: |
+          Specifies the ALP's ATI mode as follows:
+          0: Partial
+          1: Full
+
+      azoteq,sense-mode:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1]
+        description: |
+          Specifies the ALP's sensing mode as follows:
+          0: Self capacitive
+          1: Mutual capacitive
+
+      azoteq,debounce-enter:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's debounce entrance factor.
+
+      azoteq,debounce-exit:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's debounce exit factor.
+
+      azoteq,thresh:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the ALP's proximity or touch threshold.
+
+      azoteq,conv-period:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's conversion period.
+
+      azoteq,conv-frac:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the ALP's conversion frequency fraction.
+
+      linux,code: true
+
+    additionalProperties: false
+
+  button:
+    type: object
+    description: Represents the inductive or capacitive button.
+
+    properties:
+      azoteq,ati-frac-div-fine:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the button's ATI fine fractional divider.
+
+      azoteq,ati-frac-mult-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 15
+        description: Specifies the button's ATI coarse fractional multiplier.
+
+      azoteq,ati-frac-div-coarse:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the button's ATI coarse fractional divider.
+
+      azoteq,ati-comp-div:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 31
+        description: Specifies the button's ATI compensation divider.
+
+      azoteq,ati-target:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the button's ATI target.
+
+      azoteq,ati-base:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        multipleOf: 8
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's ATI base.
+
+      azoteq,ati-mode:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1]
+        description: |
+          Specifies the button's ATI mode as follows:
+          0: Partial
+          1: Full
+
+      azoteq,sense-mode:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1, 2]
+        description: |
+          Specifies the button's sensing mode as follows:
+          0: Self capacitive
+          1: Mutual capacitive
+          2: Inductive
+
+      azoteq,touch-enter:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's touch entrance factor.
+
+      azoteq,touch-exit:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's touch exit factor.
+
+      azoteq,debounce-enter:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's debounce entrance factor.
+
+      azoteq,debounce-exit:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's debounce exit factor.
+
+      azoteq,thresh:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 65535
+        description: Specifies the button's proximity threshold.
+
+      azoteq,conv-period:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's conversion period.
+
+      azoteq,conv-frac:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 255
+        description: Specifies the button's conversion frequency fraction.
+
+    patternProperties:
+      "^event-(prox|touch)$":
+        type: object
+        $ref: ../input.yaml#
+        description:
+          Represents a proximity or touch event reported by the button.
+
+        properties:
+          linux,code: true
+
+        additionalProperties: false
+
+    additionalProperties: false
+
+  wakeup-source: true
+
+  touchscreen-size-x: true
+  touchscreen-size-y: true
+  touchscreen-inverted-x: true
+  touchscreen-inverted-y: true
+  touchscreen-swapped-x-y: true
+
+dependencies:
+  touchscreen-size-x: ["azoteq,num-contacts"]
+  touchscreen-size-y: ["azoteq,num-contacts"]
+  touchscreen-inverted-x: ["azoteq,num-contacts"]
+  touchscreen-inverted-y: ["azoteq,num-contacts"]
+  touchscreen-swapped-x-y: ["azoteq,num-contacts"]
+
+required:
+  - compatible
+  - reg
+  - irq-gpios
+
+additionalProperties: false
+
+allOf:
+  - $ref: touchscreen.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: azoteq,iqs7210a
+
+    then:
+      properties:
+        alp:
+          properties:
+            azoteq,rx-enable:
+              maxItems: 4
+              items:
+                minimum: 4
+
+    else:
+      properties:
+        azoteq,timeout-press-ms: false
+
+        alp:
+          properties:
+            azoteq,ati-mode: false
+
+        button: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: azoteq,iqs7211e
+
+    then:
+      properties:
+        reset-gpios: false
+
+        trackpad:
+          properties:
+            azoteq,tx-enable:
+              maxItems: 13
+              items:
+                maximum: 12
+
+        alp:
+          properties:
+            azoteq,tx-enable:
+              maxItems: 13
+              items:
+                maximum: 12
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/input/input.h>
+
+    i2c {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            touch@56 {
+                    compatible = "azoteq,iqs7210a";
+                    reg = <0x56>;
+                    irq-gpios = <&gpio 4 GPIO_ACTIVE_LOW>;
+                    reset-gpios = <&gpio 17 (GPIO_ACTIVE_LOW |
+                                             GPIO_PUSH_PULL)>;
+                    azoteq,num-contacts = <2>;
+
+                    trackpad {
+                            azoteq,rx-enable = <6>, <5>, <4>, <3>, <2>;
+                            azoteq,tx-enable = <1>, <7>, <8>, <9>, <10>;
+                    };
+
+                    button {
+                            azoteq,sense-mode = <2>;
+                            azoteq,touch-enter = <40>;
+                            azoteq,touch-exit = <36>;
+
+                            event-touch {
+                                    linux,code = <KEY_HOME>;
+                            };
+                    };
+
+                    alp {
+                            azoteq,sense-mode = <1>;
+                            linux,code = <KEY_POWER>;
+                    };
+            };
+    };
+
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/input/input.h>
+
+    i2c {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            touch@56 {
+                    compatible = "azoteq,iqs7211e";
+                    reg = <0x56>;
+                    irq-gpios = <&gpio 4 (GPIO_ACTIVE_LOW |
+                                          GPIO_OPEN_DRAIN)>;
+
+                    trackpad {
+                            event-tap {
+                                    linux,code = <KEY_PLAYPAUSE>;
+                            };
+
+                            event-tap-double {
+                                    linux,code = <KEY_SHUFFLE>;
+                            };
+
+                            event-tap-triple {
+                                    linux,code = <KEY_AGAIN>;
+                            };
+
+                            event-hold {
+                                    linux,code = <KEY_STOP>;
+                            };
+
+                            event-palm {
+                                    linux,code = <KEY_EXIT>;
+                            };
+
+                            event-swipe-x-pos {
+                                    linux,code = <KEY_REWIND>;
+                            };
+
+                            event-swipe-x-pos-hold {
+                                    linux,code = <KEY_PREVIOUS>;
+                            };
+
+                            event-swipe-x-neg {
+                                    linux,code = <KEY_FASTFORWARD>;
+                            };
+
+                            event-swipe-x-neg-hold {
+                                    linux,code = <KEY_NEXT>;
+                            };
+
+                            event-swipe-y-pos {
+                                    linux,code = <KEY_VOLUMEUP>;
+                            };
+
+                            event-swipe-y-pos-hold {
+                                    linux,code = <KEY_MUTE>;
+                            };
+
+                            event-swipe-y-neg {
+                                    linux,code = <KEY_VOLUMEDOWN>;
+                            };
+
+                            event-swipe-y-neg-hold {
+                                    linux,code = <KEY_MUTE>;
+                            };
+                    };
+            };
+    };
+
+...
index ef4c841..f2808cb 100644 (file)
@@ -93,6 +93,12 @@ properties:
     minimum: 1
     maximum: 255
 
+  threshold:
+    description: Allows setting the  "click"-threshold in the range from 0 to 255.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 255
+
   touchscreen-size-x: true
   touchscreen-size-y: true
   touchscreen-fuzz-x: true
index 007adbc..9dc25d3 100644 (file)
@@ -24,6 +24,8 @@ properties:
     maxItems: 1
   reset-gpios:
     maxItems: 1
+  vdd-supply:
+    description: Power supply regulator for the chip
   touchscreen-size-x: true
   touchscreen-size-y: true
   touchscreen-inverted-x: true
index fdd0289..07f9dd6 100644 (file)
@@ -52,6 +52,11 @@ properties:
   touchscreen-swapped-x-y: true
   touchscreen-max-pressure: true
 
+  linux,keycodes:
+    description: Keycodes for the touch keys
+    minItems: 1
+    maxItems: 15
+
 additionalProperties: false
 
 required:
diff --git a/Documentation/devicetree/bindings/input/touchscreen/stmpe.txt b/Documentation/devicetree/bindings/input/touchscreen/stmpe.txt
deleted file mode 100644 (file)
index 238b515..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-STMPE Touchscreen
-----------------
-
-Required properties:
- - compatible: "st,stmpe-ts"
-
-Optional properties:
-- st,ave-ctrl          : Sample average control
-                               0 -> 1 sample
-                               1 -> 2 samples
-                               2 -> 4 samples
-                               3 -> 8 samples
-- st,touch-det-delay   : Touch detect interrupt delay (recommended is 3)
-                               0 -> 10 us
-                               1 -> 50 us
-                               2 -> 100 us
-                               3 -> 500 us
-                               4 -> 1 ms
-                               5 -> 5 ms
-                               6 -> 10 ms
-                               7 -> 50 ms
-- st,settling          : Panel driver settling time (recommended is 2)
-                               0 -> 10 us
-                               1 -> 100 us
-                               2 -> 500 us
-                               3 -> 1 ms
-                               4 -> 5 ms
-                               5 -> 10 ms
-                               6 -> 50 ms
-                               7 -> 100 ms
-- st,fraction-z                : Length of the fractional part in z (recommended is 7)
-                         (fraction-z ([0..7]) = Count of the fractional part)
-- st,i-drive           : current limit value of the touchscreen drivers
-                               0 -> 20 mA (typical 35mA max)
-                               1 -> 50 mA (typical 80 mA max)
-
-Optional properties common with MFD (deprecated):
- - st,sample-time      : ADC conversion time in number of clock.
-                               0 -> 36 clocks
-                               1 -> 44 clocks
-                               2 -> 56 clocks
-                               3 -> 64 clocks
-                               4 -> 80 clocks (recommended)
-                               5 -> 96 clocks
-                               6 -> 124 clocks
- - st,mod-12b          : ADC Bit mode
-                               0 -> 10bit ADC
-                               1 -> 12bit ADC
- - st,ref-sel          : ADC reference source
-                               0 -> internal
-                               1 -> external
- - st,adc-freq         : ADC Clock speed
-                               0 -> 1.625 MHz
-                               1 -> 3.25 MHz
-                               2 || 3 -> 6.5 MHz
-
-Node should be child node of stmpe node to which it belongs.
-
-Note that common ADC settings of stmpe_touchscreen (child) will take precedence
-over the settings done in MFD.
-
-Example:
-
-stmpe811@41 {
-       compatible = "st,stmpe811";
-       pinctrl-names = "default";
-       pinctrl-0 = <&pinctrl_touch_int>;
-       #address-cells = <1>;
-       #size-cells = <0>;
-       reg = <0x41>;
-       interrupts = <10 IRQ_TYPE_LEVEL_LOW>;
-       interrupt-parent = <&gpio4>;
-       interrupt-controller;
-       id = <0>;
-       blocks = <0x5>;
-       irq-trigger = <0x1>;
-       /* Common ADC settings */
-       /* 3.25 MHz ADC clock speed */
-       st,adc-freq = <1>;
-       /* 12-bit ADC */
-       st,mod-12b = <1>;
-       /* internal ADC reference */
-       st,ref-sel = <0>;
-       /* ADC converstion time: 80 clocks */
-       st,sample-time = <4>;
-
-       stmpe_touchscreen {
-               compatible = "st,stmpe-ts";
-               reg = <0>;
-               /* 8 sample average control */
-               st,ave-ctrl = <3>;
-               /* 5 ms touch detect interrupt delay */
-               st,touch-det-delay = <5>;
-               /* 1 ms panel driver settling time */
-               st,settling = <3>;
-               /* 7 length fractional part in z */
-               st,fraction-z = <7>;
-               /*
-                * 50 mA typical 80 mA max touchscreen drivers
-                * current limit value
-                */
-               st,i-drive = <1>;
-       };
-       stmpe_adc {
-               compatible = "st,stmpe-adc";
-               st,norequest-mask = <0x0F>;
-       };
-};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt b/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt
deleted file mode 100644 (file)
index 977d7ed..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-STMicroelectronics STi System Configuration Controlled IRQs
------------------------------------------------------------
-
-On STi based systems; External, CTI (Core Sight), PMU (Performance Management),
-and PL310 L2 Cache IRQs are controlled using System Configuration registers.
-This driver is used to unmask them prior to use.
-
-Required properties:
-- compatible   : Should be "st,stih407-irq-syscfg"
-- st,syscfg    : Phandle to Cortex-A9 IRQ system config registers
-- st,irq-device        : Array of IRQs to enable - should be 2 in length
-- st,fiq-device        : Array of FIQs to enable - should be 2 in length
-
-Optional properties:
-- st,invert-ext        : External IRQs can be inverted at will.  This property inverts
-                 these IRQs using bitwise logic.  A number of defines have been
-                 provided for convenience:
-                       ST_IRQ_SYSCFG_EXT_1_INV
-                       ST_IRQ_SYSCFG_EXT_2_INV
-                       ST_IRQ_SYSCFG_EXT_3_INV
-Example:
-
-irq-syscfg {
-       compatible    = "st,stih407-irq-syscfg";
-       st,syscfg     = <&syscfg_cpu>;
-       st,irq-device = <ST_IRQ_SYSCFG_PMU_0>,
-                       <ST_IRQ_SYSCFG_PMU_1>;
-       st,fiq-device = <ST_IRQ_SYSCFG_DISABLED>,
-                       <ST_IRQ_SYSCFG_DISABLED>;
-};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stih407-irq-syscfg.yaml b/Documentation/devicetree/bindings/interrupt-controller/st,stih407-irq-syscfg.yaml
new file mode 100644 (file)
index 0000000..2b153d7
--- /dev/null
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/st,stih407-irq-syscfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STi System Configuration Controlled IRQs
+
+maintainers:
+  - Patrice Chotard <patrice.chotard@foss.st.com>
+
+description:
+  On STi based systems; External, CTI (Core Sight), PMU (Performance
+  Management), and PL310 L2 Cache IRQs are controlled using System
+  Configuration registers.  This device is used to unmask them prior to use.
+
+properties:
+  compatible:
+    const: st,stih407-irq-syscfg
+
+  st,syscfg:
+    description: Phandle to Cortex-A9 IRQ system config registers
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  st,irq-device:
+    description: Array of IRQs to enable.
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    items:
+      - description: Enable the IRQ of the channel one.
+      - description: Enable the IRQ of the channel two.
+
+  st,fiq-device:
+    description: Array of FIQs to enable.
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    items:
+      - description: Enable the IRQ of the channel one.
+      - description: Enable the IRQ of the channel two.
+
+  st,invert-ext:
+    description: External IRQs can be inverted at will. This property inverts
+      these three IRQs using bitwise logic, each one being encoded respectively
+      on the first, second and fourth bit.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 1, 2, 3, 4, 5, 6 ]
+
+required:
+  - compatible
+  - st,syscfg
+  - st,irq-device
+  - st,fiq-device
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq-st.h>
+    irq-syscfg {
+        compatible    = "st,stih407-irq-syscfg";
+        st,syscfg     = <&syscfg_cpu>;
+        st,irq-device = <ST_IRQ_SYSCFG_PMU_0>,
+                        <ST_IRQ_SYSCFG_PMU_1>;
+        st,fiq-device = <ST_IRQ_SYSCFG_DISABLED>,
+                        <ST_IRQ_SYSCFG_DISABLED>;
+    };
+...
index 58b492d..5fb7007 100644 (file)
@@ -83,8 +83,7 @@ properties:
       - enum:
             # LED will act as a back-light, controlled by the framebuffer system
           - backlight
-            # LED will turn on (but for leds-gpio see "default-state" property in
-            # Documentation/devicetree/bindings/leds/leds-gpio.yaml)
+            # LED will turn on (see also "default-state" property)
           - default-on
             # LED "double" flashes at a load average based rate
           - heartbeat
@@ -158,6 +157,18 @@ properties:
       For flash LED controllers with configurable current this property is
       mandatory for the LEDs in the non-flash modes (e.g. torch or indicator).
 
+  max-brightness:
+    description:
+      Normally, the maximum brightness is determined by the hardware, and this
+      property is not required. This property is used to set a software limit.
+      It could happen that an LED is made so bright that it gets damaged or
+      causes damage due to restrictions in a specific system, such as mounting
+      conditions.
+      Note that this flag is mainly used for PWM-LEDs, where it is not possible
+      to map brightness to current. Drivers for other controllers should use
+      led-max-microamp.
+    $ref: /schemas/types.yaml#definitions/uint32
+
   panic-indicator:
     description:
       This property specifies that the LED should be used, if at all possible,
diff --git a/Documentation/devicetree/bindings/leds/leds-an30259a.txt b/Documentation/devicetree/bindings/leds/leds-an30259a.txt
deleted file mode 100644 (file)
index cbd8339..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-* Panasonic AN30259A 3-channel LED driver
-
-The AN30259A is a LED controller capable of driving three LEDs independently. It supports
-constant current output and sloping current output modes. The chip is connected over I2C.
-
-Required properties:
-       - compatible: Must be "panasonic,an30259a".
-       - reg: I2C slave address.
-       - #address-cells: Must be 1.
-       - #size-cells: Must be 0.
-
-Each LED is represented as a sub-node of the panasonic,an30259a node.
-
-Required sub-node properties:
-       - reg: Pin that the LED is connected to. Must be 1, 2, or 3.
-
-Optional sub-node properties:
-       - function :
-               see Documentation/devicetree/bindings/leds/common.txt
-       - color :
-               see Documentation/devicetree/bindings/leds/common.txt
-       - label :
-               see Documentation/devicetree/bindings/leds/common.txt (deprecated)
-       - linux,default-trigger :
-               see Documentation/devicetree/bindings/leds/common.txt
-
-Example:
-
-#include <dt-bindings/leds/common.h>
-
-led-controller@30 {
-       compatible = "panasonic,an30259a";
-       reg = <0x30>;
-       #address-cells = <1>;
-       #size-cells = <0>;
-
-       led@1 {
-               reg = <1>;
-               linux,default-trigger = "heartbeat";
-               function = LED_FUNCTION_INDICATOR;
-               color = <LED_COLOR_ID_RED>;
-       };
-
-       led@2 {
-               reg = <2>;
-               function = LED_FUNCTION_INDICATOR;
-               color = <LED_COLOR_ID_GREEN>;
-       };
-
-       led@3 {
-               reg = <3>;
-               function = LED_FUNCTION_INDICATOR;
-               color = <LED_COLOR_ID_BLUE>;
-       };
-};
index 08f3e1c..2623844 100644 (file)
@@ -20,9 +20,20 @@ properties:
   reg:
     maxItems: 1
 
+  interrupts:
+    maxItems: 1
+    description: Open-drain, low active interrupt pin "INTN".
+      Used to report completion of operations (power up, LED breath effects).
+
   vcc-supply:
     description: Regulator providing power to the "VCC" pin.
 
+  vio-supply:
+    description: Regulator providing power for pull-up of the I/O lines.
+      "VIO1" in the typical application circuit example of the datasheet.
+      Note that this regulator does not directly connect to AW2013, but is
+      needed for the correct operation of the interrupt and I2C lines.
+
   "#address-cells":
     const: 1
 
@@ -52,6 +63,7 @@ additionalProperties: false
 examples:
   - |
     #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
     #include <dt-bindings/leds/common.h>
 
     i2c {
@@ -61,6 +73,7 @@ examples:
         led-controller@45 {
             compatible = "awinic,aw2013";
             reg = <0x45>;
+            interrupts = <42 IRQ_TYPE_LEVEL_LOW>;
             #address-cells = <1>;
             #size-cells = <0>;
 
diff --git a/Documentation/devicetree/bindings/leds/leds-group-multicolor.yaml b/Documentation/devicetree/bindings/leds/leds-group-multicolor.yaml
new file mode 100644 (file)
index 0000000..8ed059a
--- /dev/null
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/leds-group-multicolor.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Multi-color LED built with monochromatic LEDs
+
+maintainers:
+  - Jean-Jacques Hiblot <jjhiblot@traphandler.com>
+
+description: |
+  This driver combines several monochromatic LEDs into one multi-color
+  LED using the multicolor LED class.
+
+properties:
+  compatible:
+    const: leds-group-multicolor
+
+  leds:
+    description:
+      An aray of monochromatic leds
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+
+required:
+  - leds
+
+allOf:
+  - $ref: leds-class-multicolor.yaml#
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/leds/common.h>
+
+    monochromatic-leds {
+        compatible = "gpio-leds";
+
+        led0: led-0 {
+            gpios = <&mcu_pio 0 GPIO_ACTIVE_LOW>;
+            color = <LED_COLOR_ID_RED>;
+        };
+
+        led1: led-1 {
+            gpios = <&mcu_pio 1 GPIO_ACTIVE_HIGH>;
+            color = <LED_COLOR_ID_GREEN>;
+        };
+
+        led2: led-2 {
+            gpios = <&mcu_pio 2 GPIO_ACTIVE_HIGH>;
+            color = <LED_COLOR_ID_BLUE>;
+        };
+    };
+
+    multi-led {
+        compatible = "leds-group-multicolor";
+        color = <LED_COLOR_ID_RGB>;
+        function = LED_FUNCTION_INDICATOR;
+        leds = <&led0>, <&led1>, <&led2>;
+    };
+
+...
index edf6f55..9610bca 100644 (file)
@@ -29,6 +29,10 @@ properties:
 
   gpio-controller: true
 
+  gpio-line-names:
+    minItems: 1
+    maxItems: 16
+
   '#gpio-cells':
     const: 2
 
diff --git a/Documentation/devicetree/bindings/leds/nxp,pca995x.yaml b/Documentation/devicetree/bindings/leds/nxp,pca995x.yaml
new file mode 100644 (file)
index 0000000..654915c
--- /dev/null
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/nxp,pca995x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP PCA995x LED controllers
+
+maintainers:
+  - Isai Gaspar <isaiezequiel.gaspar@nxp.com>
+  - Marek Vasut <marex@denx.de>
+
+description:
+  The NXP PCA9952/PCA9955B are programmable LED controllers connected via I2C
+  that can drive 16 separate lines. Each of them can be individually switched
+  on and off, and brightness can be controlled via individual PWM.
+
+  Datasheets are available at
+  https://www.nxp.com/docs/en/data-sheet/PCA9952_PCA9955.pdf
+  https://www.nxp.com/docs/en/data-sheet/PCA9955B.pdf
+
+properties:
+  compatible:
+    enum:
+      - nxp,pca9952
+      - nxp,pca9955b
+
+  reg:
+    maxItems: 1
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+patternProperties:
+  "^led@[0-9a-f]+$":
+    type: object
+    $ref: common.yaml#
+    unevaluatedProperties: false
+
+    properties:
+      reg:
+        minimum: 0
+        maximum: 15
+
+    required:
+      - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/leds/common.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        led-controller@1 {
+            compatible = "nxp,pca9955b";
+            reg = <0x01>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            led@0 {
+                reg = <0x0>;
+                color = <LED_COLOR_ID_RED>;
+                function = LED_FUNCTION_POWER;
+            };
+
+            led@2 {
+                reg = <0x2>;
+                color = <LED_COLOR_ID_WHITE>;
+                function = LED_FUNCTION_STATUS;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/leds/panasonic,an30259a.yaml b/Documentation/devicetree/bindings/leds/panasonic,an30259a.yaml
new file mode 100644 (file)
index 0000000..e918dce
--- /dev/null
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/panasonic,an30259a.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Panasonic AN30259A 3-channel LED controller
+
+maintainers:
+  - Iskren Chernev <me@iskren.info>
+
+description:
+  The AN30259A is a LED controller capable of driving three LEDs independently.
+  It supports constant current output and sloping current output modes. The chip
+  is connected over I2C.
+
+properties:
+  compatible:
+    const: panasonic,an30259a
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+patternProperties:
+  "^led@[1-3]$":
+    $ref: common.yaml#
+    unevaluatedProperties: false
+
+    properties:
+      reg:
+        enum: [ 1, 2, 3 ]
+
+required:
+  - compatible
+  - reg
+  - "#address-cells"
+  - "#size-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/leds/common.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        led-controller@30 {
+            compatible = "panasonic,an30259a";
+            reg = <0x30>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            led@1 {
+                reg = <1>;
+                linux,default-trigger = "heartbeat";
+                function = LED_FUNCTION_INDICATOR;
+                color = <LED_COLOR_ID_RED>;
+            };
+
+            led@2 {
+                reg = <2>;
+                function = LED_FUNCTION_INDICATOR;
+                color = <LED_COLOR_ID_GREEN>;
+            };
+
+            led@3 {
+                reg = <3>;
+                function = LED_FUNCTION_INDICATOR;
+                color = <LED_COLOR_ID_BLUE>;
+            };
+        };
+    };
+...
index 14700a2..44dd91a 100644 (file)
@@ -35,7 +35,7 @@ properties:
     description: GPIO pin to enable/disable the device.
 
 patternProperties:
-  "^led@[0-6]$":
+  "^led@[0-5]$":
     type: object
     $ref: common.yaml#
     unevaluatedProperties: false
@@ -43,7 +43,7 @@ patternProperties:
     properties:
       reg:
         minimum: 0
-        maximum: 6
+        maximum: 5
 
     required:
       - reg
index 58f0d94..b7a3ef7 100644 (file)
@@ -18,8 +18,6 @@ description: |
 
   The device has two LED outputs referred as GRNLED and AMBLED in data-sheet.
 
-select: false
-
 properties:
   compatible:
     const: rohm,bd71828-leds
diff --git a/Documentation/devicetree/bindings/media/i2c/ov5695.txt b/Documentation/devicetree/bindings/media/i2c/ov5695.txt
deleted file mode 100644 (file)
index 640a637..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-* Omnivision OV5695 MIPI CSI-2 sensor
-
-Required Properties:
-- compatible: shall be "ovti,ov5695"
-- clocks: reference to the xvclk input clock
-- clock-names: shall be "xvclk"
-- avdd-supply: Analog voltage supply, 2.8 volts
-- dovdd-supply: Digital I/O voltage supply, 1.8 volts
-- dvdd-supply: Digital core voltage supply, 1.2 volts
-- reset-gpios: Low active reset gpio
-
-The device node shall contain one 'port' child node with an
-'endpoint' subnode for its digital output video port,
-in accordance with the video interface bindings defined in
-Documentation/devicetree/bindings/media/video-interfaces.txt.
-The endpoint optional property 'data-lanes' shall be "<1 2>".
-
-Example:
-&i2c7 {
-       ov5695: camera-sensor@36 {
-               compatible = "ovti,ov5695";
-               reg = <0x36>;
-               pinctrl-names = "default";
-               pinctrl-0 = <&clk_24m_cam>;
-
-               clocks = <&cru SCLK_TESTCLKOUT1>;
-               clock-names = "xvclk";
-
-               avdd-supply = <&pp2800_cam>;
-               dovdd-supply = <&pp1800>;
-               dvdd-supply = <&pp1250_cam>;
-               reset-gpios = <&gpio2 5 GPIO_ACTIVE_LOW>;
-
-               port {
-                       wcam_out: endpoint {
-                               remote-endpoint = <&mipi_in_wcam>;
-                               data-lanes = <1 2>;
-                       };
-               };
-       };
-};
diff --git a/Documentation/devicetree/bindings/media/i2c/ov7251.txt b/Documentation/devicetree/bindings/media/i2c/ov7251.txt
deleted file mode 100644 (file)
index 8281151..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-* Omnivision 1/7.5-Inch B&W VGA CMOS Digital Image Sensor
-
-The Omnivision OV7251 is a 1/7.5-Inch CMOS active pixel digital image sensor
-with an active array size of 640H x 480V. It is programmable through a serial
-I2C interface.
-
-Required Properties:
-- compatible: Value should be "ovti,ov7251".
-- clocks: Reference to the xclk clock.
-- clock-names: Should be "xclk".
-- clock-frequency: Frequency of the xclk clock.
-- enable-gpios: Chip enable GPIO. Polarity is GPIO_ACTIVE_HIGH. This corresponds
-  to the hardware pin XSHUTDOWN which is physically active low.
-- vdddo-supply: Chip digital IO regulator.
-- vdda-supply: Chip analog regulator.
-- vddd-supply: Chip digital core regulator.
-
-The device node shall contain one 'port' child node with a single 'endpoint'
-subnode for its digital output video port, in accordance with the video
-interface bindings defined in
-Documentation/devicetree/bindings/media/video-interfaces.txt.
-
-Example:
-
-       &i2c1 {
-               ...
-
-               ov7251: camera-sensor@60 {
-                       compatible = "ovti,ov7251";
-                       reg = <0x60>;
-
-                       enable-gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>;
-                       pinctrl-names = "default";
-                       pinctrl-0 = <&camera_bw_default>;
-
-                       clocks = <&clks 200>;
-                       clock-names = "xclk";
-                       clock-frequency = <24000000>;
-
-                       vdddo-supply = <&camera_dovdd_1v8>;
-                       vdda-supply = <&camera_avdd_2v8>;
-                       vddd-supply = <&camera_dvdd_1v2>;
-
-                       port {
-                               ov7251_ep: endpoint {
-                                       clock-lanes = <1>;
-                                       data-lanes = <0>;
-                                       remote-endpoint = <&csi0_ep>;
-                               };
-                       };
-               };
-       };
index 359dc08..6829a4a 100644 (file)
@@ -5,26 +5,41 @@
 $id: http://devicetree.org/schemas/media/i2c/ovti,ov5693.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Omnivision OV5693 CMOS Sensor
+title: Omnivision OV5693/OV5695 CMOS Sensors
 
 maintainers:
   - Tommaso Merciai <tommaso.merciai@amarulasolutions.com>
 
 description: |
-  The Omnivision OV5693 is a high performance, 1/4-inch, 5 megapixel, CMOS
-  image sensor that delivers 2592x1944 at 30fps. It provides full-frame,
+  The Omnivision OV5693/OV5695 are high performance, 1/4-inch, 5 megapixel, CMOS
+  image sensors that deliver 2592x1944 at 30fps. It provides full-frame,
   sub-sampled, and windowed 10-bit MIPI images in various formats via the
   Serial Camera Control Bus (SCCB) interface.
 
-  OV5693 is controlled via I2C and two-wire Serial Camera Control Bus (SCCB).
-  The sensor output is available via CSI-2 serial data output (up to 2-lane).
+  OV5693/OV5695 are controlled via I2C and two-wire Serial Camera Control Bus
+  (SCCB). The sensor output is available via CSI-2 serial data output (up to
+  2-lane).
 
 allOf:
   - $ref: /schemas/media/video-interface-devices.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ovti,ov5693
+    then:
+      properties:
+        port:
+          properties:
+            endpoint:
+              required:
+                - link-frequencies
 
 properties:
   compatible:
-    const: ovti,ov5693
+    enum:
+      - ovti,ov5693
+      - ovti,ov5695
 
   reg:
     maxItems: 1
@@ -34,6 +49,9 @@ properties:
       System input clock (aka XVCLK). From 6 to 27 MHz.
     maxItems: 1
 
+  clock-names:
+    const: xvclk
+
   dovdd-supply:
     description:
       Digital I/O voltage supply, 1.8V.
@@ -72,7 +90,6 @@ properties:
 
         required:
           - data-lanes
-          - link-frequencies
 
 required:
   - compatible
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov7251.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov7251.yaml
new file mode 100644 (file)
index 0000000..2e5187a
--- /dev/null
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/i2c/ovti,ov7251.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: OmniVision OV7251 Image Sensor
+
+description:
+  The Omnivision OV7251 is a 1/7.5-Inch CMOS active pixel digital image sensor
+  with an active array size of 640H x 480V. It is programmable through a serial
+  I2C interface.
+
+maintainers:
+  - Todor Tomov <todor.too@gmail.com>
+
+properties:
+  compatible:
+    const: ovti,ov7251
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    description: XCLK Input Clock
+
+  clock-names:
+    const: xclk
+
+  clock-frequency:
+    description: Frequency of the xclk clock in Hz.
+
+  vdda-supply:
+    description: Analog voltage supply, 2.8 volts
+
+  vddd-supply:
+    description: Digital core voltage supply, 1.2 volts
+
+  vdddo-supply:
+    description: Digital I/O voltage supply, 1.8 volts
+
+  enable-gpios:
+    maxItems: 1
+    description:
+      Reference to the GPIO connected to the XSHUTDOWN pin, if any. Polarity
+      is GPIO_ACTIVE_HIGH.
+
+  port:
+    description: Digital Output Port
+    $ref: /schemas/graph.yaml#/$defs/port-base
+    additionalProperties: false
+
+    properties:
+      endpoint:
+        $ref: /schemas/media/video-interfaces.yaml#
+        unevaluatedProperties: false
+
+        properties:
+          clock-lanes:
+            maximum: 1
+
+          data-lanes:
+            maxItems: 1
+
+          link-frequencies: true
+
+        required:
+          - data-lanes
+          - link-frequencies
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - vdddo-supply
+  - vdda-supply
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        camera@3c {
+            compatible = "ovti,ov7251";
+            reg = <0x3c>;
+            clocks = <&clks 1>;
+            clock-frequency = <24000000>;
+            vdddo-supply = <&ov7251_vdddo_1v8>;
+            vdda-supply = <&ov7251_vdda_2v8>;
+            vddd-supply = <&ov7251_vddd_1v5>;
+            enable-gpios = <&gpio1 19 GPIO_ACTIVE_HIGH>;
+
+            port {
+                ov7251_ep: endpoint {
+                    remote-endpoint = <&csi0_ep>;
+                    clock-lanes = <1>;
+                    data-lanes = <0>;
+                    link-frequencies = /bits/ 64 <240000000 319200000>;
+                };
+            };
+        };
+    };
+...
index 0bad7e6..e466dff 100644 (file)
@@ -199,6 +199,7 @@ examples:
             wcam: camera@36 {
                 compatible = "ovti,ov5695";
                 reg = <0x36>;
+                clocks = <&cru SCLK_TESTCLKOUT1>;
 
                 port {
                     wcam_out: endpoint {
index cf94176..8789e36 100644 (file)
@@ -34,6 +34,9 @@ patternProperties:
           - allwinner,sun6i-a31-clock-reset
           - fixed-factor-clock
 
+    required:
+      - compatible
+
     allOf:
       - if:
           properties:
@@ -55,25 +58,17 @@ patternProperties:
             "#clock-cells":
               const: 0
 
-            # Already checked in the main schema
-            compatible: true
-
             clocks:
               maxItems: 2
 
             clock-output-names:
               maxItems: 1
 
-            phandle: true
-
           required:
             - "#clock-cells"
-            - compatible
             - clocks
             - clock-output-names
 
-          additionalProperties: false
-
       - if:
           properties:
             compatible:
@@ -85,25 +80,17 @@ patternProperties:
             "#clock-cells":
               const: 0
 
-            # Already checked in the main schema
-            compatible: true
-
             clocks:
               maxItems: 1
 
             clock-output-names:
               maxItems: 1
 
-            phandle: true
-
           required:
             - "#clock-cells"
-            - compatible
             - clocks
             - clock-output-names
 
-          additionalProperties: false
-
       - if:
           properties:
             compatible:
@@ -119,9 +106,6 @@ patternProperties:
                 offset of the bit controlling this particular gate in
                 the register.
 
-            # Already checked in the main schema
-            compatible: true
-
             clocks:
               maxItems: 1
 
@@ -129,16 +113,11 @@ patternProperties:
               minItems: 1
               maxItems: 32
 
-            phandle: true
-
           required:
             - "#clock-cells"
-            - compatible
             - clocks
             - clock-output-names
 
-          additionalProperties: false
-
       - if:
           properties:
             compatible:
@@ -150,9 +129,6 @@ patternProperties:
             "#clock-cells":
               const: 0
 
-            # Already checked in the main schema
-            compatible: true
-
             clocks:
               maxItems: 4
               description: >
@@ -162,16 +138,11 @@ patternProperties:
             clock-output-names:
               maxItems: 1
 
-            phandle: true
-
           required:
             - "#clock-cells"
-            - compatible
             - clocks
             - clock-output-names
 
-          additionalProperties: false
-
       - if:
           properties:
             compatible:
@@ -183,16 +154,8 @@ patternProperties:
             "#reset-cells":
               const: 1
 
-            # Already checked in the main schema
-            compatible: true
-
-            phandle: true
-
           required:
             - "#reset-cells"
-            - compatible
-
-          additionalProperties: false
 
 required:
   - compatible
index 16c80a7..e51f855 100644 (file)
@@ -57,25 +57,17 @@ patternProperties:
             "#clock-cells":
               const: 0
 
-            # Already checked in the main schema
-            compatible: true
-
             clocks:
               maxItems: 1
 
             clock-output-names:
               maxItems: 1
 
-            phandle: true
-
           required:
             - "#clock-cells"
-            - compatible
             - clocks
             - clock-output-names
 
-          additionalProperties: false
-
       - if:
           properties:
             compatible:
@@ -91,9 +83,6 @@ patternProperties:
                 offset of the bit controlling this particular gate in
                 the register.
 
-            # Already checked in the main schema
-            compatible: true
-
             clocks:
               maxItems: 1
 
@@ -101,16 +90,11 @@ patternProperties:
               minItems: 1
               maxItems: 32
 
-            phandle: true
-
           required:
             - "#clock-cells"
-            - compatible
             - clocks
             - clock-output-names
 
-          additionalProperties: false
-
       - if:
           properties:
             compatible:
@@ -122,34 +106,8 @@ patternProperties:
             "#reset-cells":
               const: 1
 
-            # Already checked in the main schema
-            compatible: true
-
-            phandle: true
-
           required:
             - "#reset-cells"
-            - compatible
-
-          additionalProperties: false
-
-      - if:
-          properties:
-            compatible:
-              contains:
-                const: allwinner,sun8i-a23-codec-analog
-
-        then:
-          properties:
-            # Already checked in the main schema
-            compatible: true
-
-            phandle: true
-
-          required:
-            - compatible
-
-          additionalProperties: false
 
 required:
   - compatible
index 9d83753..af692e8 100644 (file)
@@ -6,6 +6,7 @@ at boot time according to the device tree.
 
 Required properties:
 - compatible:          Should be "atmel,sama5d2-flexcom"
+                       or "microchip,sam9x7-flexcom", "atmel,sama5d2-flexcom"
 - reg:                 Should be the offset/length value for Flexcom dedicated
                        I/O registers (without USART, TWI or SPI registers).
 - clocks:              Should be the Flexcom peripheral clock from PMC.
index e8c5255..3c989d1 100644 (file)
@@ -6,6 +6,7 @@ Required properties:
 - compatible:          Should be one of the following:
                        "atmel,at91sam9260-gpbr", "syscon"
                        "microchip,sam9x60-gpbr", "syscon"
+                       "microchip,sam9x7-gpbr", "microchip,sam9x60-gpbr", "syscon"
 - reg:                 contains offset/length value of the GPBR memory
                        region.
 
index 5f8880c..7de696e 100644 (file)
@@ -8,6 +8,7 @@ Required properties:
    "atmel,sama5d3-hlcdc"
    "atmel,sama5d4-hlcdc"
    "microchip,sam9x60-hlcdc"
+   "microchip,sam9x75-xlcdc"
  - reg: base address and size of the HLCDC device registers.
  - clock-names: the name of the 3 clocks requested by the HLCDC device.
    Should contain "periph_clk", "sys_clk" and "slow_clk".
index 89d05c6..6e5f836 100644 (file)
@@ -14,6 +14,7 @@ Required properties:
                        "atmel,at91sam9x5-matrix", "syscon"
                        "atmel,sama5d3-matrix", "syscon"
                        "microchip,sam9x60-matrix", "syscon"
+                       "microchip,sam9x7-matrix", "atmel,at91sam9x5-matrix", "syscon"
 - reg:                 Contains offset/length value of the Bus Matrix
                        memory region.
 
index 5696d9f..fd62add 100644 (file)
@@ -10,6 +10,7 @@ Required properties:
                        "atmel,sama5d3-smc", "syscon"
                        "atmel,sama5d2-smc", "syscon"
                        "microchip,sam9x60-smc", "syscon"
+                       "microchip,sam9x7-smc", "atmel,at91sam9260-smc", "syscon"
 - reg:                 Contains offset/length value of the SMC memory
                        region.
 
index 9f9a14a..cb48016 100644 (file)
@@ -35,7 +35,7 @@ patternProperties:
   "^gpio@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../gpio/brcm,bcm63xx-gpio.yaml"
+    $ref: /schemas/gpio/brcm,bcm63xx-gpio.yaml
     description:
       GPIO controller for the SoC GPIOs. This child node definition
       should follow the bindings specified in
@@ -44,7 +44,7 @@ patternProperties:
   "^pinctrl@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../pinctrl/brcm,bcm6318-pinctrl.yaml"
+    $ref: /schemas/pinctrl/brcm,bcm6318-pinctrl.yaml
     description:
       Pin controller for the SoC pins. This child node definition
       should follow the bindings specified in
index 803277d..c14def1 100644 (file)
@@ -35,7 +35,7 @@ patternProperties:
   "^gpio@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../gpio/brcm,bcm63xx-gpio.yaml"
+    $ref: /schemas/gpio/brcm,bcm63xx-gpio.yaml
     description:
       GPIO controller for the SoC GPIOs. This child node definition
       should follow the bindings specified in
@@ -44,7 +44,7 @@ patternProperties:
   "^pinctrl@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../pinctrl/brcm,bcm63268-pinctrl.yaml"
+    $ref: /schemas/pinctrl/brcm,bcm63268-pinctrl.yaml
     description:
       Pin controller for the SoC pins. This child node definition
       should follow the bindings specified in
index b9a6856..5f48209 100644 (file)
@@ -35,7 +35,7 @@ patternProperties:
   "^gpio@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../gpio/brcm,bcm63xx-gpio.yaml"
+    $ref: /schemas/gpio/brcm,bcm63xx-gpio.yaml
     description:
       GPIO controller for the SoC GPIOs. This child node definition
       should follow the bindings specified in
@@ -44,7 +44,7 @@ patternProperties:
   "^pinctrl@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../pinctrl/brcm,bcm6328-pinctrl.yaml"
+    $ref: /schemas/pinctrl/brcm,bcm6328-pinctrl.yaml
     description:
       Pin controller for the SoC pins. This child node definition
       should follow the bindings specified in
index 4651fe4..f1f4629 100644 (file)
@@ -35,7 +35,7 @@ patternProperties:
   "^gpio@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../gpio/brcm,bcm63xx-gpio.yaml"
+    $ref: /schemas/gpio/brcm,bcm63xx-gpio.yaml
     description:
       GPIO controller for the SoC GPIOs. This child node definition
       should follow the bindings specified in
@@ -44,7 +44,7 @@ patternProperties:
   "^pinctrl@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../pinctrl/brcm,bcm6358-pinctrl.yaml"
+    $ref: /schemas/pinctrl/brcm,bcm6358-pinctrl.yaml
     description:
       Pin controller for the SoC pins. This child node definition
       should follow the bindings specified in
index 0330b62..4d59473 100644 (file)
@@ -35,7 +35,7 @@ patternProperties:
   "^gpio@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../gpio/brcm,bcm63xx-gpio.yaml"
+    $ref: /schemas/gpio/brcm,bcm63xx-gpio.yaml
     description:
       GPIO controller for the SoC GPIOs. This child node definition
       should follow the bindings specified in
@@ -44,7 +44,7 @@ patternProperties:
   "^pinctrl@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../pinctrl/brcm,bcm6362-pinctrl.yaml"
+    $ref: /schemas/pinctrl/brcm,bcm6362-pinctrl.yaml
     description:
       Pin controller for the SoC pins. This child node definition
       should follow the bindings specified in
index 82d3e44..aae83d4 100644 (file)
@@ -35,7 +35,7 @@ patternProperties:
   "^gpio@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../gpio/brcm,bcm63xx-gpio.yaml"
+    $ref: /schemas/gpio/brcm,bcm63xx-gpio.yaml
     description:
       GPIO controller for the SoC GPIOs. This child node definition
       should follow the bindings specified in
@@ -44,7 +44,7 @@ patternProperties:
   "^pinctrl@[0-9a-f]+$":
     # Child node
     type: object
-    $ref: "../pinctrl/brcm,bcm6368-pinctrl.yaml"
+    $ref: /schemas/pinctrl/brcm,bcm6368-pinctrl.yaml
     description:
       Pin controller for the SoC pins. This child node definition
       should follow the bindings specified in
index 1b06a77..9804d13 100644 (file)
@@ -31,12 +31,17 @@ properties:
   charger:
     $ref: /schemas/power/supply/maxim,max77693.yaml
 
+  connector:
+    $ref: /schemas/connector/usb-connector.yaml#
+    unevaluatedProperties: false
+
   led:
     $ref: /schemas/leds/maxim,max77693.yaml
 
   max77693-muic:
     type: object
     additionalProperties: false
+    deprecated: true
 
     properties:
       compatible:
@@ -45,6 +50,21 @@ properties:
     required:
       - compatible
 
+  muic:
+    type: object
+    additionalProperties: false
+
+    properties:
+      compatible:
+        const: maxim,max77693-muic
+
+      connector:
+        $ref: /schemas/connector/usb-connector.yaml#
+        unevaluatedProperties: false
+
+    required:
+      - compatible
+
   motor-driver:
     type: object
     additionalProperties: false
@@ -107,6 +127,38 @@ examples:
                 };
             };
 
+            muic {
+                compatible = "maxim,max77693-muic";
+
+                connector {
+                    compatible = "samsung,usb-connector-11pin",
+                                 "usb-b-connector";
+                    label = "micro-USB";
+                    type = "micro";
+
+                    ports {
+                        #address-cells = <1>;
+                        #size-cells = <0>;
+
+                        port@0 {
+                            reg = <0>;
+
+                            muic_to_usb: endpoint {
+                                remote-endpoint = <&usb_to_muic>;
+                            };
+                        };
+
+                        port@3 {
+                            reg = <3>;
+
+                            muic_to_mhl: endpoint {
+                                remote-endpoint = <&mhl_to_muic>;
+                            };
+                        };
+                    };
+                };
+            };
+
             motor-driver {
                 compatible = "maxim,max77693-haptic";
                 haptic-supply = <&ldo26_reg>;
index 8b9a200..9f03436 100644 (file)
@@ -41,6 +41,7 @@ properties:
           - qcom,pm660
           - qcom,pm660l
           - qcom,pm7250b
+          - qcom,pm7550ba
           - qcom,pm7325
           - qcom,pm8004
           - qcom,pm8005
@@ -70,6 +71,8 @@ properties:
           - qcom,pm8994
           - qcom,pm8998
           - qcom,pma8084
+          - qcom,pmc8180
+          - qcom,pmc8180c
           - qcom,pmd9635
           - qcom,pmi632
           - qcom,pmi8950
@@ -88,6 +91,7 @@ properties:
           - qcom,pms405
           - qcom,pmx55
           - qcom,pmx65
+          - qcom,pmx75
           - qcom,smb2351
       - const: qcom,spmi-pmic
 
@@ -127,7 +131,7 @@ patternProperties:
 
   "^audio-codec@[0-9a-f]+$":
     type: object
-    additionalProperties: true # FIXME qcom,pm8916-wcd-analog-codec binding not converted yet
+    $ref: /schemas/sound/qcom,pm8916-wcd-analog-codec.yaml#
 
   "^charger@[0-9a-f]+$":
     type: object
index 7ab7b2c..d783cc4 100644 (file)
@@ -130,7 +130,6 @@ dependencies:
 examples:
   - |
     #include <dt-bindings/interrupt-controller/irq.h>
-    #include <dt-bindings/leds/common.h>
 
     i2c {
         #address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/mfd/st,stmpe.yaml b/Documentation/devicetree/bindings/mfd/st,stmpe.yaml
new file mode 100644 (file)
index 0000000..b77cc3f
--- /dev/null
@@ -0,0 +1,297 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/st,stmpe.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectonics Port Expander (STMPE)
+
+description: STMicroelectronics Port Expander (STMPE) is a series of slow
+  bus controllers for various expanded peripherals such as GPIO, keypad,
+  touchscreen, ADC, PWM or rotator. It can contain one or several different
+  peripherals connected to SPI or I2C.
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+properties:
+  compatible:
+    enum:
+      - st,stmpe601
+      - st,stmpe801
+      - st,stmpe811
+      - st,stmpe1600
+      - st,stmpe1601
+      - st,stmpe2401
+      - st,stmpe2403
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  vcc-supply: true
+
+  vio-supply: true
+
+  reset-gpios:
+    maxItems: 1
+
+  wakeup-source: true
+
+  st,autosleep-timeout:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 4, 16, 32, 64, 128, 256, 512, 1024 ]
+    description: Time idle before going to automatic sleep to save power
+
+  st,sample-time:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 0, 1, 2, 3, 4, 5, 6 ]
+    description: |
+      Sample time per iteration
+      0 = 36 clock ticks
+      1 = 44 clock ticks
+      2 = 56 clock ticks
+      3 = 64 clock ticks
+      4 = 80 clock ticks - recommended
+      5 = 96 clock ticks
+      6 = 124 clock ticks
+
+  st,mod-12b:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 0, 1 ]
+    description: ADC bit mode 0 = 10bit ADC, 1 = 12bit ADC
+
+  st,ref-sel:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 0, 1 ]
+    description: ADC reference source 0 = internal, 1 = external
+
+  st,adc-freq:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 0, 1, 2, 3 ]
+    description: |
+      ADC clock speed
+      0 = 1.625 MHz
+      1 = 3.25 MHz
+      2, 3 = 6.5 MHz
+
+  adc:
+    type: object
+    $ref: /schemas/iio/adc/st,stmpe-adc.yaml#
+
+  gpio:
+    type: object
+    $ref: /schemas/gpio/st,stmpe-gpio.yaml#
+
+  keyboard-controller:
+    type: object
+    $ref: /schemas/input/matrix-keymap.yaml#
+
+    unevaluatedProperties: false
+
+    properties:
+      compatible:
+        const: st,stmpe-keypad
+
+      debounce-interval:
+        description: Debouncing interval in milliseconds
+        $ref: /schemas/types.yaml#/definitions/uint32
+
+      st,no-autorepeat:
+        description: If present, the keys will not autorepeat when pressed
+        $ref: /schemas/types.yaml#/definitions/flag
+
+      st,scan-count:
+        description: Scanning cycles elapsed before key data is updated
+        $ref: /schemas/types.yaml#/definitions/uint32
+
+    required:
+      - compatible
+      - linux,keymap
+
+  pwm:
+    type: object
+    $ref: /schemas/pwm/pwm.yaml#
+
+    unevaluatedProperties: false
+
+    properties:
+      compatible:
+        const: st,stmpe-pwm
+
+      "#pwm-cells":
+        const: 2
+
+  touchscreen:
+    type: object
+    $ref: /schemas/input/touchscreen/touchscreen.yaml#
+
+    unevaluatedProperties: false
+
+    properties:
+      compatible:
+        const: st,stmpe-ts
+
+      st,ave-ctrl:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [ 0, 1, 2, 3 ]
+        description: |
+          Sample average control
+          0 = 1 sample
+          1 = 2 samples
+          2 = 4 samples
+          3 = 8 samples
+
+      st,touch-det-delay:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [ 0, 1, 2, 3, 4, 5, 6, 7 ]
+        description: |
+          Touch detection delay
+          0 = 10 us
+          1 = 50 us
+          2 = 100 us
+          3 = 500 us - recommended
+          4 = 1 ms
+          5 = 5 ms
+          6 = 10 ms
+          7 = 50 ms
+
+      st,settling:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [ 0, 1, 2, 3, 4, 5, 6, 7 ]
+        description: |
+          Panel driver settling time
+          0 = 10 us
+          1 = 100 us
+          2 = 500 us - recommended
+          3 = 1 ms
+          4 = 5 ms
+          5 = 10 ms
+          6 = 50 ms
+          7 = 100 ms
+
+      st,fraction-z:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [ 0, 1, 2, 3, 4, 5, 6, 7 ]
+        description: Length of the fractional part in z, recommended is 7
+          (fraction-z ([0..7]) = Count of the fractional part)
+
+      st,i-drive:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [ 0, 1 ]
+        description: |
+          current limit value of the touchscreen drivers
+          0 = 20 mA (typical 35 mA max)
+          1 = 50 mA (typical 80 mA max)
+
+    required:
+      - compatible
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/input/input.h>
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      port-expander@43 {
+        compatible = "st,stmpe2401";
+        reg = <0x43>;
+        reset-gpios = <&gpio 13 GPIO_ACTIVE_LOW>;
+        interrupts = <26 IRQ_TYPE_EDGE_FALLING>;
+        interrupt-parent = <&gpio>;
+        vcc-supply = <&db8500_vsmps2_reg>;
+        vio-supply = <&db8500_vsmps2_reg>;
+        wakeup-source;
+        st,autosleep-timeout = <1024>;
+
+        gpio {
+          compatible = "st,stmpe-gpio";
+          gpio-controller;
+          #gpio-cells = <2>;
+          interrupt-controller;
+          #interrupt-cells = <2>;
+          st,norequest-mask = <0xf0f002>;
+        };
+
+        keyboard-controller {
+          compatible = "st,stmpe-keypad";
+          debounce-interval = <64>;
+          st,scan-count = <8>;
+          st,no-autorepeat;
+          keypad,num-rows = <8>;
+          keypad,num-columns = <8>;
+          linux,keymap = <
+              MATRIX_KEY(0x00, 0x00, KEY_1)
+              MATRIX_KEY(0x00, 0x01, KEY_2)
+              MATRIX_KEY(0x00, 0x02, KEY_3)
+              MATRIX_KEY(0x00, 0x03, KEY_4)
+              MATRIX_KEY(0x00, 0x04, KEY_5)
+              MATRIX_KEY(0x00, 0x05, KEY_6)
+              MATRIX_KEY(0x00, 0x06, KEY_7)
+              MATRIX_KEY(0x00, 0x07, KEY_8)
+              MATRIX_KEY(0x00, 0x08, KEY_9)
+              MATRIX_KEY(0x00, 0x09, KEY_0)
+          >;
+        };
+
+        pwm {
+          compatible = "st,stmpe-pwm";
+          #pwm-cells = <2>;
+        };
+      };
+
+      port-expander@41 {
+        compatible = "st,stmpe811";
+        reg = <0x41>;
+        interrupts = <10 IRQ_TYPE_LEVEL_LOW>;
+        interrupt-parent = <&gpio>;
+        st,adc-freq = <1>;
+        st,mod-12b = <1>;
+        st,ref-sel = <0>;
+        st,sample-time = <4>;
+
+        adc {
+          compatible = "st,stmpe-adc";
+          st,norequest-mask = <0x0f>;
+          #io-channel-cells = <1>;
+        };
+
+        gpio {
+          compatible = "st,stmpe-gpio";
+          gpio-controller;
+          #gpio-cells = <2>;
+          interrupt-controller;
+          #interrupt-cells = <2>;
+        };
+
+        pwm {
+          compatible = "st,stmpe-pwm";
+          #pwm-cells = <2>;
+        };
+
+        touchscreen {
+          compatible = "st,stmpe-ts";
+          st,ave-ctrl = <3>;
+          st,touch-det-delay = <5>;
+          st,settling = <3>;
+          st,fraction-z = <7>;
+          st,i-drive = <1>;
+        };
+      };
+    };
+...
index 97c6109..b17ebeb 100644 (file)
@@ -106,6 +106,7 @@ properties:
         const: st,stpmic1-regulators
 
       ldo3:
+        $ref: /schemas/regulator/regulator.yaml
         type: object
 
         properties:
@@ -128,6 +129,7 @@ properties:
         additionalProperties: false
 
       ldo4:
+        $ref: /schemas/regulator/regulator.yaml
         type: object
 
         properties:
@@ -142,11 +144,14 @@ properties:
           regulator-name: true
           regulator-boot-on: true
           regulator-always-on: true
+          regulator-min-microvolt: true
+          regulator-max-microvolt: true
           regulator-over-current-protection: true
 
         additionalProperties: false
 
       vref_ddr:
+        $ref: /schemas/regulator/regulator.yaml
         type: object
 
         properties:
@@ -165,6 +170,7 @@ properties:
         additionalProperties: false
 
       boost:
+        $ref: /schemas/regulator/regulator.yaml
         type: object
 
         properties:
@@ -187,10 +193,8 @@ properties:
       "^(buck[1-4]|ldo[1-6]|vref_ddr|boost|pwr_sw[1-2])-supply$":
         description: STPMIC1 voltage regulators supplies
 
-      "^(buck[1-4]|ldo[1-6]|boost|vref_ddr|pwr_sw[1-2])$":
-        $ref: ../regulator/regulator.yaml
-
       "^ldo[1-2,5-6]$":
+        $ref: /schemas/regulator/regulator.yaml
         type: object
 
         properties:
@@ -213,6 +217,7 @@ properties:
         additionalProperties: false
 
       "^buck[1-4]$":
+        $ref: /schemas/regulator/regulator.yaml
         type: object
 
         properties:
@@ -237,6 +242,7 @@ properties:
         additionalProperties: false
 
       "^pwr_sw[1-2]$":
+        $ref: /schemas/regulator/regulator.yaml
         type: object
 
         properties:
index a66d58b..5e0002f 100644 (file)
@@ -72,44 +72,52 @@ properties:
           main voltage domain for the chip.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_varm:
         description: The voltage for the ARM Cortex A-9 CPU.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_vmodem:
         description: The voltage for the modem subsystem.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_vpll:
         description: The voltage for the phase locked loop clocks.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_vsmps1:
         description: Also known as VIO12, is a step-down voltage regulator
           for 1.2V I/O. SMPS means System Management Power Source.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_vsmps2:
         description: Also known as VIO18, is a step-down voltage regulator
           for 1.8V I/O. SMPS means System Management Power Source.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_vsmps3:
         description: This is a step-down voltage regulator
           for 0.87 thru 1.875V I/O. SMPS means System Management Power Source.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_vrf1:
         description: RF transceiver voltage regulator.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_sva_mmdsp:
         description: Smart Video Accelerator (SVA) multimedia DSP (MMDSP)
@@ -117,18 +125,21 @@ properties:
           for video encoding and decoding.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_sva_mmdsp_ret:
         description: Smart Video Accelerator (SVA) multimedia DSP (MMDSP)
           voltage regulator for retention mode.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_sva_pipe:
         description: Smart Video Accelerator (SVA) multimedia DSP (MMDSP)
           voltage regulator for the data pipe.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_sia_mmdsp:
         description: Smart Image Accelerator (SIA) multimedia DSP (MMDSP)
@@ -136,18 +147,21 @@ properties:
           for image encoding and decoding.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_sia_mmdsp_ret:
         description: Smart Image Accelerator (SIA) multimedia DSP (MMDSP)
           voltage regulator for retention mode.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_sia_pipe:
         description: Smart Image Accelerator (SIA) multimedia DSP (MMDSP)
           voltage regulator for the data pipe.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_sga:
         description: Smart Graphics Accelerator (SGA) voltage regulator.
@@ -155,6 +169,7 @@ properties:
           accelerator block.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_b2r2_mcde:
         description: Blit Blend Rotate and Rescale (B2R2), and Multi-Channel
@@ -162,28 +177,33 @@ properties:
           blocks.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_esram12:
         description: Embedded Static RAM (ESRAM) 1 and 2 voltage regulator.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_esram12_ret:
         description: Embedded Static RAM (ESRAM) 1 and 2 voltage regulator for
           retention mode.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_esram34:
         description: Embedded Static RAM (ESRAM) 3 and 4 voltage regulator.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
       db8500_esram34_ret:
         description: Embedded Static RAM (ESRAM) 3 and 4 voltage regulator for
           retention mode.
         type: object
         $ref: ../regulator/regulator.yaml#
+        unevaluatedProperties: false
 
     required:
       - compatible
diff --git a/Documentation/devicetree/bindings/mfd/stmpe.txt b/Documentation/devicetree/bindings/mfd/stmpe.txt
deleted file mode 100644 (file)
index d4408a4..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-* ST Microelectronics STMPE Multi-Functional Device
-
-STMPE is an MFD device which may expose the following inbuilt devices: gpio,
-keypad, touchscreen, adc, pwm, rotator.
-
-Required properties:
- - compatible                  : "st,stmpe[610|801|811|1600|1601|2401|2403]"
- - reg                         : I2C/SPI address of the device
-
-Optional properties:
- - interrupts                  : The interrupt outputs from the controller
- - interrupt-controller                : Marks the device node as an interrupt controller
- - wakeup-source               : Marks the input device as wakable
- - st,autosleep-timeout                : Valid entries (ms); 4, 16, 32, 64, 128, 256, 512 and 1024
- - irq-gpio                    : If present, which GPIO to use for event IRQ
-
-Optional properties for devices with touch and ADC (STMPE811|STMPE610):
- - st,sample-time              : ADC conversion time in number of clock.
-                                       0 -> 36 clocks          4 -> 80 clocks (recommended)
-                                       1 -> 44 clocks          5 -> 96 clocks
-                                       2 -> 56 clocks          6 -> 124 clocks
-                                       3 -> 64 clocks
- - st,mod-12b                  : ADC Bit mode
-                                       0 -> 10bit ADC          1 -> 12bit ADC
- - st,ref-sel                  : ADC reference source
-                                       0 -> internal           1 -> external
- - st,adc-freq                 : ADC Clock speed
-                                       0 -> 1.625 MHz          2 || 3 -> 6.5 MHz
-                                       1 -> 3.25 MHz
-
-Example:
-
-       stmpe1601: stmpe1601@40 {
-               compatible = "st,stmpe1601";
-               reg = <0x40>;
-               interrupts = <26 0x4>;
-               interrupt-parent = <&gpio6>;
-               interrupt-controller;
-
-               wakeup-source;
-               st,autosleep-timeout = <1024>;
-       };
diff --git a/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt b/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt
deleted file mode 100644 (file)
index c42eecf..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-Broadcom Kona PWM controller device tree bindings
-
-This controller has 6 channels.
-
-Required Properties :
-- compatible: should contain "brcm,kona-pwm"
-- reg: physical base address and length of the controller's registers
-- clocks: phandle + clock specifier pair for the external clock
-- #pwm-cells: Should be 3. See pwm.yaml in this directory for a
-  description of the cells format.
-
-Refer to clocks/clock-bindings.txt for generic clock consumer properties.
-
-Example:
-
-pwm: pwm@3e01a000 {
-       compatible = "brcm,bcm11351-pwm", "brcm,kona-pwm";
-       reg = <0x3e01a000 0xc4>;
-       clocks = <&pwm_clk>;
-       #pwm-cells = <3>;
-};
diff --git a/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.yaml b/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.yaml
new file mode 100644 (file)
index 0000000..e86c805
--- /dev/null
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/brcm,kona-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Kona family PWM controller
+
+description:
+  This controller has 6 channels.
+
+maintainers:
+  - Florian Fainelli <f.fainelli@gmail.com>
+
+allOf:
+  - $ref: pwm.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - brcm,bcm11351-pwm
+      - const: brcm,kona-pwm
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  '#pwm-cells':
+    const: 3
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/bcm281xx.h>
+
+    pwm@3e01a000 {
+       compatible = "brcm,bcm11351-pwm", "brcm,kona-pwm";
+       reg = <0x3e01a000 0xcc>;
+       clocks = <&slave_ccu BCM281XX_SLAVE_CCU_PWM>;
+       #pwm-cells = <3>;
+    };
+...
index 0c3910f..30632ef 100644 (file)
@@ -20,7 +20,9 @@ properties:
       - fsl,imx7ulp-cm4
       - fsl,imx8mm-cm4
       - fsl,imx8mn-cm7
+      - fsl,imx8mn-cm7-mmio
       - fsl,imx8mp-cm7
+      - fsl,imx8mp-cm7-mmio
       - fsl,imx8mq-cm4
       - fsl,imx8qm-cm4
       - fsl,imx8qxp-cm4
@@ -70,6 +72,11 @@ properties:
     description:
       Specify CPU entry address for SCU enabled processor.
 
+  fsl,iomuxc-gpr:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to IOMUXC GPR block which provide access to CM7 CPUWAIT bit.
+
   fsl,resource-id:
     $ref: /schemas/types.yaml#/definitions/uint32
     description:
@@ -79,6 +86,19 @@ properties:
 required:
   - compatible
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          not:
+            contains:
+              enum:
+                - fsl,imx8mn-cm7-mmio
+                - fsl,imx8mp-cm7-mmio
+    then:
+      properties:
+        fsl,iomuxc-gpr: false
+
 additionalProperties: false
 
 examples:
index 643ee78..a2b0079 100644 (file)
@@ -26,6 +26,7 @@ properties:
       - qcom,sdm660-adsp-pas
       - qcom,sdm845-adsp-pas
       - qcom,sdm845-cdsp-pas
+      - qcom,sdm845-slpi-pas
 
   reg:
     maxItems: 1
@@ -44,8 +45,13 @@ properties:
     maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
+  firmware-name:
+    maxItems: 1
+    description: Firmware name for the Hexagon core
+
 required:
   - compatible
+  - memory-region
 
 unevaluatedProperties: false
 
@@ -63,6 +69,7 @@ allOf:
               - qcom,msm8998-adsp-pas
               - qcom,sdm845-adsp-pas
               - qcom,sdm845-cdsp-pas
+              - qcom,sdm845-slpi-pas
     then:
       properties:
         clocks:
@@ -104,6 +111,7 @@ allOf:
               - qcom,msm8998-slpi-pas
               - qcom,sdm845-adsp-pas
               - qcom,sdm845-cdsp-pas
+              - qcom,sdm845-slpi-pas
     then:
       properties:
         interrupts:
@@ -160,6 +168,22 @@ allOf:
   - if:
       properties:
         compatible:
+          enum:
+            - qcom,sdm845-slpi-pas
+    then:
+      properties:
+        power-domains:
+          items:
+            - description: LCX power domain
+            - description: LMX power domain
+        power-domain-names:
+          items:
+            - const: lcx
+            - const: lmx
+
+  - if:
+      properties:
+        compatible:
           contains:
             enum:
               - qcom,msm8226-adsp-pil
index c1ac6ca..0643faa 100644 (file)
@@ -19,6 +19,7 @@ properties:
     enum:
       - qcom,msm8996-mss-pil
       - qcom,msm8998-mss-pil
+      - qcom,sdm660-mss-pil
       - qcom,sdm845-mss-pil
 
   reg:
@@ -215,13 +216,12 @@ allOf:
             - description: GCC MSS IFACE clock
             - description: GCC MSS BUS clock
             - description: GCC MSS MEM clock
-            - description: RPMH XO clock
+            - description: RPM XO clock
             - description: GCC MSS GPLL0 clock
             - description: GCC MSS SNOC_AXI clock
             - description: GCC MSS MNOC_AXI clock
-            - description: RPMH PNOC clock
-            - description: GCC MSS PRNG clock
-            - description: RPMH QDSS clock
+            - description: RPM PNOC clock
+            - description: RPM QDSS clock
         clock-names:
           items:
             - const: iface
@@ -245,7 +245,9 @@ allOf:
   - if:
       properties:
         compatible:
-          const: qcom,msm8998-mss-pil
+          enum:
+            - qcom,msm8998-mss-pil
+            - qcom,sdm660-mss-pil
     then:
       properties:
         clocks:
index 171ef85..63a82e7 100644 (file)
@@ -82,7 +82,6 @@ required:
   - clock-names
   - interrupts
   - interrupt-names
-  - memory-region
   - qcom,smem-states
   - qcom,smem-state-names
 
index 5efa0e5..eb868a7 100644 (file)
@@ -42,7 +42,7 @@ properties:
   smd-edge: false
 
   memory-region:
-    minItems: 1
+    maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
   firmware-name:
@@ -52,6 +52,7 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
index 5cefd2c..689d5d5 100644 (file)
@@ -51,7 +51,7 @@ properties:
       - const: mss
 
   memory-region:
-    minItems: 1
+    maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
   qcom,qmp:
@@ -67,6 +67,7 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
index c1f8dd8..4744a37 100644 (file)
@@ -38,7 +38,7 @@ properties:
   smd-edge: false
 
   memory-region:
-    minItems: 1
+    maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
   firmware-name:
@@ -48,6 +48,7 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
index f6fbc53..96d53ba 100644 (file)
@@ -38,7 +38,7 @@ properties:
   smd-edge: false
 
   memory-region:
-    minItems: 1
+    maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
   firmware-name:
@@ -48,6 +48,7 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
index c66e298..5d46327 100644 (file)
@@ -46,7 +46,7 @@ properties:
       - const: mss
 
   memory-region:
-    minItems: 1
+    maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
   qcom,qmp:
@@ -62,6 +62,7 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
index f5d1fa9..0282872 100644 (file)
@@ -15,10 +15,19 @@ description:
 
 properties:
   compatible:
-    enum:
-      - qcom,sm6115-adsp-pas
-      - qcom,sm6115-cdsp-pas
-      - qcom,sm6115-mpss-pas
+    oneOf:
+      - enum:
+          - qcom,sm6115-adsp-pas
+          - qcom,sm6115-cdsp-pas
+          - qcom,sm6115-mpss-pas
+
+      - items:
+          - const: qcom,qcm2290-adsp-pas
+          - const: qcom,sm6115-adsp-pas
+
+      - items:
+          - const: qcom,qcm2290-mpss-pas
+          - const: qcom,sm6115-mpss-pas
 
   reg:
     maxItems: 1
@@ -32,7 +41,7 @@ properties:
       - const: xo
 
   memory-region:
-    minItems: 1
+    maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
   smd-edge: false
@@ -44,15 +53,17 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
   - if:
       properties:
         compatible:
-          enum:
-            - qcom,sm6115-adsp-pas
-            - qcom,sm6115-cdsp-pas
+          contains:
+            enum:
+              - qcom,sm6115-adsp-pas
+              - qcom,sm6115-cdsp-pas
     then:
       properties:
         interrupts:
@@ -69,9 +80,10 @@ allOf:
   - if:
       properties:
         compatible:
-          enum:
-            - qcom,sm6115-cdsp-pas
-            - qcom,sm6115-mpss-pas
+          contains:
+            enum:
+              - qcom,sm6115-cdsp-pas
+              - qcom,sm6115-mpss-pas
     then:
       properties:
         power-domains:
@@ -84,8 +96,9 @@ allOf:
   - if:
       properties:
         compatible:
-          enum:
-            - qcom,sm6115-adsp-pas
+          contains:
+            enum:
+              - qcom,sm6115-adsp-pas
     then:
       properties:
         power-domains:
index fee02fa..f7e40fb 100644 (file)
@@ -36,7 +36,7 @@ properties:
     description: Reference to the AOSS side-channel message RAM.
 
   memory-region:
-    minItems: 1
+    maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
   smd-edge: false
@@ -48,6 +48,7 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
index 2c085ac..238c6e5 100644 (file)
@@ -40,7 +40,7 @@ properties:
     description: Reference to the AOSS side-channel message RAM.
 
   memory-region:
-    minItems: 1
+    maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
   smd-edge: false
@@ -52,6 +52,7 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
index 9a04d19..53cea8e 100644 (file)
@@ -43,7 +43,7 @@ properties:
   smd-edge: false
 
   memory-region:
-    minItems: 1
+    maxItems: 1
     description: Reference to the reserved-memory for the Hexagon core
 
   firmware-name:
@@ -53,6 +53,7 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
index fe216aa..5812082 100644 (file)
@@ -53,6 +53,7 @@ properties:
 required:
   - compatible
   - reg
+  - memory-region
 
 allOf:
   - $ref: /schemas/remoteproc/qcom,pas-common.yaml#
index 4d2bef1..c8bb2ee 100644 (file)
@@ -14,13 +14,17 @@ maintainers:
 
 properties:
   compatible:
-    enum:
-      - atmel,at91rm9200-rtc
-      - atmel,at91sam9x5-rtc
-      - atmel,sama5d4-rtc
-      - atmel,sama5d2-rtc
-      - microchip,sam9x60-rtc
-      - microchip,sama7g5-rtc
+    oneOf:
+      - enum:
+          - atmel,at91rm9200-rtc
+          - atmel,at91sam9x5-rtc
+          - atmel,sama5d4-rtc
+          - atmel,sama5d2-rtc
+          - microchip,sam9x60-rtc
+          - microchip,sama7g5-rtc
+      - items:
+          - const: microchip,sam9x7-rtc
+          - const: microchip,sam9x60-rtc
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/intersil,isl12022.yaml b/Documentation/devicetree/bindings/rtc/intersil,isl12022.yaml
new file mode 100644 (file)
index 0000000..c2d1441
--- /dev/null
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/intersil,isl12022.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intersil ISL12022 Real-time Clock
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+properties:
+  compatible:
+    const: isil,isl12022
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 0
+
+  isil,battery-trip-levels-microvolt:
+    description:
+      The battery voltages at which the first alarm and second alarm
+      should trigger (normally ~85% and ~75% of nominal V_BAT).
+    items:
+      - enum: [2125000, 2295000, 2550000, 2805000, 3060000, 4250000, 4675000]
+      - enum: [1875000, 2025000, 2250000, 2475000, 2700000, 3750000, 4125000]
+
+required:
+  - compatible
+  - reg
+
+allOf:
+  - $ref: rtc.yaml#
+  # If #clock-cells is present, interrupts must not be present
+  - if:
+      required:
+        - '#clock-cells'
+    then:
+      properties:
+        interrupts: false
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        rtc@6f {
+            compatible = "isil,isl12022";
+            reg = <0x6f>;
+            interrupts-extended = <&gpio1 5 IRQ_TYPE_LEVEL_LOW>;
+            isil,battery-trip-levels-microvolt = <2550000>, <2250000>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt b/Documentation/devicetree/bindings/rtc/maxim,ds3231.txt
deleted file mode 100644 (file)
index 85be53a..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-* Maxim DS3231 Real Time Clock
-
-Required properties:
-- compatible: Should contain "maxim,ds3231".
-- reg: I2C address for chip.
-
-Optional property:
-- #clock-cells: Should be 1.
-- clock-output-names:
-  overwrite the default clock names "ds3231_clk_sqw" and "ds3231_clk_32khz".
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. Following indices are allowed:
-    - 0: square-wave output on the SQW pin
-    - 1: square-wave output on the 32kHz pin
-
-- interrupts: rtc alarm/event interrupt. When this property is selected,
-  clock on the SQW pin cannot be used.
-
-Example:
-
-ds3231: ds3231@51 {
-       compatible = "maxim,ds3231";
-       reg = <0x68>;
-       #clock-cells = <1>;
-};
-
-device1 {
-...
-       clocks = <&ds3231 0>;
-...
-};
-
-device2 {
-...
-       clocks = <&ds3231 1>;
-...
-};
index bcb2300..2d9fe5a 100644 (file)
@@ -18,6 +18,7 @@ properties:
       - nxp,pca2129
       - nxp,pcf2127
       - nxp,pcf2129
+      - nxp,pcf2131
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/rtc/st,m48t86.yaml b/Documentation/devicetree/bindings/rtc/st,m48t86.yaml
new file mode 100644 (file)
index 0000000..e3e12fa
--- /dev/null
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/st,m48t86.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ST M48T86 / Dallas DS12887 RTC with SRAM
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+allOf:
+  - $ref: rtc.yaml
+
+properties:
+  compatible:
+    enum:
+      - st,m48t86
+
+  reg:
+    items:
+      - description: index register
+      - description: data register
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    rtc@10800000 {
+      compatible = "st,m48t86";
+      reg = <0x10800000 0x1>, <0x11700000 0x1>;
+    };
+
+...
index 9af77f2..2a65f31 100644 (file)
@@ -45,8 +45,6 @@ properties:
       - isil,isl1208
       # Intersil ISL1218 Low Power RTC with Battery Backed SRAM
       - isil,isl1218
-      # Intersil ISL12022 Real-time Clock
-      - isil,isl12022
       # Real Time Clock Module with I2C-Bus
       - microcrystal,rv3029
       # Real Time Clock
index bdde68a..a680d7a 100644 (file)
@@ -14,7 +14,13 @@ properties:
     pattern: "^easrc@.*"
 
   compatible:
-    const: fsl,imx8mn-easrc
+    oneOf:
+      - enum:
+          - fsl,imx8mn-easrc
+      - items:
+          - enum:
+              - fsl,imx8mp-easrc
+          - const: fsl,imx8mn-easrc
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/thermal/loongson,ls2k-thermal.yaml b/Documentation/devicetree/bindings/thermal/loongson,ls2k-thermal.yaml
new file mode 100644 (file)
index 0000000..7538469
--- /dev/null
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/loongson,ls2k-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Thermal sensors on Loongson-2 SoCs
+
+maintainers:
+  - zhanghongchen <zhanghongchen@loongson.cn>
+  - Yinbo Zhu <zhuyinbo@loongson.cn>
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - loongson,ls2k1000-thermal
+      - items:
+          - enum:
+              - loongson,ls2k2000-thermal
+          - const: loongson,ls2k1000-thermal
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    thermal: thermal-sensor@1fe01500 {
+        compatible = "loongson,ls2k1000-thermal";
+        reg = <0x1fe01500 0x30>;
+        interrupt-parent = <&liointc0>;
+        interrupts = <7 IRQ_TYPE_LEVEL_LOW>;
+    };
index f5cc7aa..443e2e7 100644 (file)
@@ -17,6 +17,7 @@ properties:
   compatible:
     enum:
       - amlogic,meson-gxbb-wdt
+      - amlogic,t7-wdt
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/watchdog/marvell,cn10624-wdt.yaml b/Documentation/devicetree/bindings/watchdog/marvell,cn10624-wdt.yaml
new file mode 100644 (file)
index 0000000..1b583f2
--- /dev/null
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/marvell,cn10624-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Global Timer (GTI) system watchdog
+
+maintainers:
+  - Bharat Bhushan <bbhushan2@marvell.com>
+
+allOf:
+  - $ref: watchdog.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - marvell,cn9670-wdt
+          - marvell,cn10624-wdt
+
+      - items:
+          - enum:
+              - marvell,cn9880-wdt
+              - marvell,cnf9535-wdt
+          - const: marvell,cn9670-wdt
+
+      - items:
+          - enum:
+              - marvell,cn10308-wdt
+              - marvell,cnf10518-wdt
+          - const: marvell,cn10624-wdt
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: refclk
+
+  marvell,wdt-timer-index:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 63
+    description:
+      An SoC have many timers (up to 64), firmware can reserve one or more timer
+      for some other use case and configures one of the global timer as watchdog
+      timer. Firmware will update this field with the timer number configured
+      as watchdog timer.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        watchdog@802000040000 {
+            compatible = "marvell,cn9670-wdt";
+            reg = <0x00008020 0x00040000 0x00000000 0x00020000>;
+            interrupts = <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>;
+            clocks = <&sclk>;
+            clock-names = "refclk";
+            marvell,wdt-timer-index = <63>;
+        };
+    };
+
+...
index 6d0fe6a..5046dfa 100644 (file)
@@ -18,6 +18,7 @@ properties:
       - items:
           - enum:
               - qcom,kpss-wdt-ipq4019
+              - qcom,apss-wdt-ipq5018
               - qcom,apss-wdt-ipq5332
               - qcom,apss-wdt-ipq9574
               - qcom,apss-wdt-msm8994
index fc55321..62ddc28 100644 (file)
@@ -34,6 +34,20 @@ properties:
   power-domains:
     maxItems: 1
 
+  memory-region:
+    maxItems: 1
+    description:
+      Contains the watchdog reserved memory. It is optional.
+      In the reserved memory, the specified values, which are
+      PON_REASON_SOF_NUM(0xBBBBCCCC), PON_REASON_MAGIC_NUM(0xDDDDDDDD),
+      and PON_REASON_EOF_NUM(0xCCCCBBBB), are pre-stored at the first
+      3 * 4 bytes to tell that last boot was caused by watchdog reset.
+      Once the PON reason is captured by driver(rti_wdt.c), the driver
+      is supposed to wipe the whole memory region. Surely, if this
+      property is set, at least 12 bytes reserved memory starting from
+      specific memory address(0xa220000) should be set. More please
+      refer to example.
+
 required:
   - compatible
   - reg
@@ -47,7 +61,18 @@ examples:
     /*
      * RTI WDT in main domain on J721e SoC. Assigned clocks are used to
      * select the source clock for the watchdog, forcing it to tick with
-     * a 32kHz clock in this case.
+     * a 32kHz clock in this case. Add a reserved memory(optional) to keep
+     * the watchdog reset cause persistent, which was be written in 12 bytes
+     * starting from 0xa2200000 by RTI Watchdog Firmware, then make it
+     * possible to get watchdog reset cause in driver.
+     *
+     * Reserved memory should be defined as follows:
+     * reserved-memory {
+     *     wdt_reset_memory_region: wdt-memory@a2200000 {
+     *         reg = <0x00 0xa2200000 0x00 0x1000>;
+     *         no-map;
+     *     };
+     * }
      */
     #include <dt-bindings/soc/ti,sci_pm_domain.h>
 
@@ -58,4 +83,5 @@ examples:
         power-domains = <&k3_pds 252 TI_SCI_PD_EXCLUSIVE>;
         assigned-clocks = <&k3_clks 252 1>;
         assigned-clock-parents = <&k3_clks 252 5>;
+        memory-region = <&wdt_reset_memory_region>;
     };
index 311af51..5da27a7 100644 (file)
@@ -32,22 +32,6 @@ register blocks.
 :c:type:`struct ata_port_operations <ata_port_operations>`
 ----------------------------------------------------------
 
-Disable ATA port
-~~~~~~~~~~~~~~~~
-
-::
-
-    void (*port_disable) (struct ata_port *);
-
-
-Called from :c:func:`ata_bus_probe` error path, as well as when unregistering
-from the SCSI module (rmmod, hot unplug). This function should do
-whatever needs to be done to take the port out of use. In most cases,
-:c:func:`ata_port_disable` can be used as this hook.
-
-Called from :c:func:`ata_bus_probe` on a failed probe. Called from
-:c:func:`ata_scsi_release`.
-
 Post-IDENTIFY device configuration
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -274,14 +258,6 @@ Exception and probe handling (EH)
 
 ::
 
-    void (*eng_timeout) (struct ata_port *ap);
-    void (*phy_reset) (struct ata_port *ap);
-
-
-Deprecated. Use ``->error_handler()`` instead.
-
-::
-
     void (*freeze) (struct ata_port *ap);
     void (*thaw) (struct ata_port *ap);
 
@@ -364,8 +340,7 @@ SATA phy read/write
                        u32 val);
 
 
-Read and write standard SATA phy registers. Currently only used if
-``->phy_reset`` hook called the :c:func:`sata_phy_reset` helper function.
+Read and write standard SATA phy registers.
 sc_reg is one of SCR_STATUS, SCR_CONTROL, SCR_ERROR, or SCR_ACTIVE.
 
 Init and shutdown
@@ -536,13 +511,12 @@ to return without deallocating the qc. This leads us to
 
 :c:func:`ata_scsi_error` is the current ``transportt->eh_strategy_handler()``
 for libata. As discussed above, this will be entered in two cases -
-timeout and ATAPI error completion. This function calls low level libata
-driver's :c:func:`eng_timeout` callback, the standard callback for which is
-:c:func:`ata_eng_timeout`. It checks if a qc is active and calls
-:c:func:`ata_qc_timeout` on the qc if so. Actual error handling occurs in
-:c:func:`ata_qc_timeout`.
+timeout and ATAPI error completion. This function will check if a qc is active
+and has not failed yet. Such a qc will be marked with AC_ERR_TIMEOUT such that
+EH will know to handle it later. Then it calls low level libata driver's
+:c:func:`error_handler` callback.
 
-If EH is invoked for timeout, :c:func:`ata_qc_timeout` stops BMDMA and
+When the :c:func:`error_handler` callback is invoked it stops BMDMA and
 completes the qc. Note that as we're currently in EH, we cannot call
 scsi_done. As described in SCSI EH doc, a recovered scmd should be
 either retried with :c:func:`scsi_queue_insert` or finished with
index bf0124f..c4581c2 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
index ffcc9f2..de84cef 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
index 958498f..5e91ec7 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: |  ok  |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
index 76ce938..085f309 100644 (file)
@@ -57,6 +57,16 @@ a snapshot on any subdirectory (and its nested contents) in the
 system.  Snapshot creation and deletion are as simple as 'mkdir
 .snap/foo' and 'rmdir .snap/foo'.
 
+Snapshot names have two limitations:
+
+* They can not start with an underscore ('_'), as these names are reserved
+  for internal usage by the MDS.
+* They can not exceed 240 characters in size.  This is because the MDS makes
+  use of long snapshot names internally, which follow the format:
+  `_<SNAPSHOT-NAME>_<INODE-NUMBER>`.  Since filenames in general can't have
+  more than 255 characters, and `<node-id>` takes 13 characters, the long
+  snapshot names can take as much as 255 - 1 - 1 - 13 = 240.
+
 Ceph also provides some recursive accounting on directories for nested
 files and bytes.  That is, a 'getfattr -d foo' on any directory in the
 system will reveal the total number of nested regular files and
index bec25c8..8a58429 100644 (file)
@@ -20,8 +20,7 @@ The gl_holders list contains all the queued lock requests (not
 just the holders) associated with the glock. If there are any
 held locks, then they will be contiguous entries at the head
 of the list. Locks are granted in strictly the order that they
-are queued, except for those marked LM_FLAG_PRIORITY which are
-used only during recovery, and even then only for journal locks.
+are queued.
 
 There are three lock states that users of the glock layer can request,
 namely shared (SH), deferred (DF) and exclusive (EX). Those translate
index 1f96155..2b59cff 100644 (file)
@@ -461,6 +461,7 @@ Memory Area, or VMA) there is a series of lines such as the following::
     Private_Dirty:         0 kB
     Referenced:          892 kB
     Anonymous:             0 kB
+    KSM:                   0 kB
     LazyFree:              0 kB
     AnonHugePages:         0 kB
     ShmemPmdMapped:        0 kB
@@ -501,6 +502,9 @@ accessed.
 a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE
 and a page is modified, the file page is replaced by a private anonymous copy.
 
+"KSM" reports how many of the pages are KSM pages. Note that KSM-placed zeropages
+are not included, only actual KSM pages.
+
 "LazyFree" shows the amount of memory which is marked by madvise(MADV_FREE).
 The memory isn't freed immediately with madvise(). It's freed in memory
 pressure if the memory is clean. Please note that the printed value might
index be131e9..4321c38 100644 (file)
@@ -11,19 +11,19 @@ via sysfs
 product_name
 ------------
 
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
    :doc: product_name
 
 product_number
 --------------
 
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
-   :doc: product_name
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+   :doc: product_number
 
 serial_number
 -------------
 
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
    :doc: serial_number
 
 unique_id
index 3ee89df..c946eb4 100644 (file)
@@ -56,6 +56,15 @@ KCONFIG_OVERWRITECONFIG
 If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
 break symlinks when .config is a symlink to somewhere else.
 
+KCONFIG_WARN_UNKNOWN_SYMBOLS
+----------------------------
+This environment variable makes Kconfig warn about all unrecognized
+symbols in the config input.
+
+KCONFIG_WERROR
+--------------
+If set, Kconfig treats warnings as errors.
+
 `CONFIG_`
 ---------
 If you set `CONFIG_` in the environment, Kconfig will prefix all symbols
@@ -212,6 +221,10 @@ Searching in menuconfig:
        first (and in alphabetical order), then come all other symbols,
        sorted in alphabetical order.
 
+       In this menu, pressing the key in the (#) prefix will jump
+       directly to that location. You will be returned to the current
+       search results after exiting this new menu.
+
 ----------------------------------------------------------------------
 
 User interface options for 'menuconfig'
@@ -264,6 +277,10 @@ Searching in nconfig:
        F8 (SymSearch) searches the configuration symbols for the
        given string or regular expression (regex).
 
+       In the SymSearch, pressing the key in the (#) prefix will
+       jump directly to that location. You will be returned to the
+       current search results after exiting this new menu.
+
 NCONFIG_MODE
 ------------
 This mode shows all sub-menus in one large tree.
index c3851fe..b1d97fa 100644 (file)
@@ -25,50 +25,38 @@ objects <https://www.aosabook.org/en/llvm.html>`_. Clang is a front-end to LLVM
 that supports C and the GNU C extensions required by the kernel, and is
 pronounced "klang," not "see-lang."
 
-Clang
------
-
-The compiler used can be swapped out via ``CC=`` command line argument to ``make``.
-``CC=`` should be set when selecting a config and during a build. ::
-
-       make CC=clang defconfig
-
-       make CC=clang
+Building with LLVM
+------------------
 
-Cross Compiling
----------------
+Invoke ``make`` via::
 
-A single Clang compiler binary will typically contain all supported backends,
-which can help simplify cross compiling. ::
-
-       make ARCH=arm64 CC=clang CROSS_COMPILE=aarch64-linux-gnu-
+       make LLVM=1
 
-``CROSS_COMPILE`` is not used to prefix the Clang compiler binary, instead
-``CROSS_COMPILE`` is used to set a command line flag: ``--target=<triple>``. For
-example: ::
+to compile for the host target. For cross compiling::
 
-       clang --target=aarch64-linux-gnu foo.c
+       make LLVM=1 ARCH=arm64
 
-LLVM Utilities
---------------
+The LLVM= argument
+------------------
 
-LLVM has substitutes for GNU binutils utilities. They can be enabled individually.
-The full list of supported make variables::
+LLVM has substitutes for GNU binutils utilities. They can be enabled
+individually. The full list of supported make variables::
 
        make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \
          OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump READELF=llvm-readelf \
          HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar HOSTLD=ld.lld
 
-To simplify the above command, Kbuild supports the ``LLVM`` variable::
-
-       make LLVM=1
+``LLVM=1`` expands to the above.
 
 If your LLVM tools are not available in your PATH, you can supply their
 location using the LLVM variable with a trailing slash::
 
        make LLVM=/path/to/llvm/
 
-which will use ``/path/to/llvm/clang``, ``/path/to/llvm/ld.lld``, etc.
+which will use ``/path/to/llvm/clang``, ``/path/to/llvm/ld.lld``, etc. The
+following may also be used::
+
+       PATH=/path/to/llvm:$PATH make LLVM=1
 
 If your LLVM tools have a version suffix and you want to test with that
 explicit version rather than the unsuffixed executables like ``LLVM=1``, you
@@ -78,31 +66,72 @@ can pass the suffix using the ``LLVM`` variable::
 
 which will use ``clang-14``, ``ld.lld-14``, etc.
 
+To support combinations of out of tree paths with version suffixes, we
+recommend::
+
+       PATH=/path/to/llvm/:$PATH make LLVM=-14
+
 ``LLVM=0`` is not the same as omitting ``LLVM`` altogether, it will behave like
-``LLVM=1``. If you only wish to use certain LLVM utilities, use their respective
-make variables.
+``LLVM=1``. If you only wish to use certain LLVM utilities, use their
+respective make variables.
+
+The same value used for ``LLVM=`` should be set for each invocation of ``make``
+if configuring and building via distinct commands. ``LLVM=`` should also be set
+as an environment variable when running scripts that will eventually run
+``make``.
 
-The integrated assembler is enabled by default. You can pass ``LLVM_IAS=0`` to
-disable it.
+Cross Compiling
+---------------
 
-Omitting CROSS_COMPILE
+A single Clang compiler binary (and corresponding LLVM utilities) will
+typically contain all supported back ends, which can help simplify cross
+compiling especially when ``LLVM=1`` is used. If you use only LLVM tools,
+``CROSS_COMPILE`` or target-triple-prefixes become unnecessary. Example::
+
+       make LLVM=1 ARCH=arm64
+
+As an example of mixing LLVM and GNU utilities, for a target like ``ARCH=s390``
+which does not yet have ``ld.lld`` or ``llvm-objcopy`` support, you could
+invoke ``make`` via::
+
+       make LLVM=1 ARCH=s390 LD=s390x-linux-gnu-ld.bfd \
+         OBJCOPY=s390x-linux-gnu-objcopy
+
+This example will invoke ``s390x-linux-gnu-ld.bfd`` as the linker and
+``s390x-linux-gnu-objcopy``, so ensure those are reachable in your ``$PATH``.
+
+``CROSS_COMPILE`` is not used to prefix the Clang compiler binary (or
+corresponding LLVM utilities) as is the case for GNU utilities when ``LLVM=1``
+is not set.
+
+The LLVM_IAS= argument
 ----------------------
 
-As explained above, ``CROSS_COMPILE`` is used to set ``--target=<triple>``.
+Clang can assemble assembler code. You can pass ``LLVM_IAS=0`` to disable this
+behavior and have Clang invoke the corresponding non-integrated assembler
+instead. Example::
+
+       make LLVM=1 LLVM_IAS=0
+
+``CROSS_COMPILE`` is necessary when cross compiling and ``LLVM_IAS=0``
+is used in order to set ``--prefix=`` for the compiler to find the
+corresponding non-integrated assembler (typically, you don't want to use the
+system assembler when targeting another architecture). Example::
 
-If ``CROSS_COMPILE`` is not specified, the ``--target=<triple>`` is inferred
-from ``ARCH``.
+       make LLVM=1 ARCH=arm LLVM_IAS=0 CROSS_COMPILE=arm-linux-gnueabi-
 
-That means if you use only LLVM tools, ``CROSS_COMPILE`` becomes unnecessary.
 
-For example, to cross-compile the arm64 kernel::
+Ccache
+------
 
-       make ARCH=arm64 LLVM=1
+``ccache`` can be used with ``clang`` to improve subsequent builds, (though
+KBUILD_BUILD_TIMESTAMP_ should be set to a deterministic value between builds
+in order to avoid 100% cache misses, see Reproducible_builds_ for more info):
 
-If ``LLVM_IAS=0`` is specified, ``CROSS_COMPILE`` is also used to derive
-``--prefix=<path>`` to search for the GNU assembler and linker. ::
+       KBUILD_BUILD_TIMESTAMP='' make LLVM=1 CC="ccache clang"
 
-       make ARCH=arm64 LLVM=1 LLVM_IAS=0 CROSS_COMPILE=aarch64-linux-gnu-
+.. _KBUILD_BUILD_TIMESTAMP: kbuild.html#kbuild-build-timestamp
+.. _Reproducible_builds: reproducible-builds.html#timestamps
 
 Supported Architectures
 -----------------------
@@ -135,14 +164,17 @@ yet. Bug reports are always welcome at the issue tracker below!
    * - hexagon
      - Maintained
      - ``LLVM=1``
+   * - loongarch
+     - Maintained
+     - ``LLVM=1``
    * - mips
      - Maintained
      - ``LLVM=1``
    * - powerpc
      - Maintained
-     - ``CC=clang``
+     - ``LLVM=1``
    * - riscv
-     - Maintained
+     - Supported
      - ``LLVM=1``
    * - s390
      - Maintained
@@ -171,7 +203,11 @@ Getting Help
 Getting LLVM
 -------------
 
-We provide prebuilt stable versions of LLVM on `kernel.org <https://kernel.org/pub/tools/llvm/>`_.
+We provide prebuilt stable versions of LLVM on `kernel.org
+<https://kernel.org/pub/tools/llvm/>`_. These have been optimized with profile
+data for building Linux kernels, which should improve kernel build times
+relative to other distributions of LLVM.
+
 Below are links that may be useful for building LLVM from source or procuring
 it through a distribution's package manager.
 
index c1c732e..09dcf63 100644 (file)
@@ -98,7 +98,7 @@ If you aren't subscribed to netdev and/or are simply unsure if
 repository link above for any new networking-related commits.  You may
 also check the following website for the current status:
 
-  https://patchwork.hopto.org/net-next.html
+  https://netdev.bots.linux.dev/net-next.html
 
 The ``net`` tree continues to collect fixes for the vX.Y content, and is
 fed back to Linus at regular (~weekly) intervals.  Meaning that the
@@ -120,7 +120,37 @@ queue for netdev:
   https://patchwork.kernel.org/project/netdevbpf/list/
 
 The "State" field will tell you exactly where things are at with your
-patch. Patches are indexed by the ``Message-ID`` header of the emails
+patch:
+
+================== =============================================================
+Patch state        Description
+================== =============================================================
+New, Under review  pending review, patch is in the maintainer’s queue for
+                   review; the two states are used interchangeably (depending on
+                   the exact co-maintainer handling patchwork at the time)
+Accepted           patch was applied to the appropriate networking tree, this is
+                   usually set automatically by the pw-bot
+Needs ACK          waiting for an ack from an area expert or testing
+Changes requested  patch has not passed the review, new revision is expected
+                   with appropriate code and commit message changes
+Rejected           patch has been rejected and new revision is not expected
+Not applicable     patch is expected to be applied outside of the networking
+                   subsystem
+Awaiting upstream  patch should be reviewed and handled by appropriate
+                   sub-maintainer, who will send it on to the networking trees;
+                   patches set to ``Awaiting upstream`` in netdev's patchwork
+                   will usually remain in this state, whether the sub-maintainer
+                   requested changes, accepted or rejected the patch
+Deferred           patch needs to be reposted later, usually due to dependency
+                   or because it was posted for a closed tree
+Superseded         new version of the patch was posted, usually set by the
+                   pw-bot
+RFC                not to be applied, usually not in maintainer’s review queue,
+                   pw-bot can automatically set patches to this state based
+                   on subject tags
+================== =============================================================
+
+Patches are indexed by the ``Message-ID`` header of the emails
 which carried them so if you have trouble finding your patch append
 the value of ``Message-ID`` to the URL above.
 
@@ -155,7 +185,7 @@ must match the MAINTAINERS entry) and a handful of senior reviewers.
 
 Bot records its activity here:
 
-  https://patchwork.hopto.org/pw-bot.html
+  https://netdev.bots.linux.dev/pw-bot.html
 
 Review timelines
 ~~~~~~~~~~~~~~~~
index 20eff96..a52996b 100644 (file)
@@ -87,13 +87,12 @@ The following keys are defined:
     emulated via software, either in or below the kernel.  These accesses are
     always extremely slow.
 
-  * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
-    in hardware, but are slower than the corresponding aligned accesses
-    sequences.
+  * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower
+    than equivalent byte accesses.  Misaligned accesses may be supported
+    directly in hardware, or trapped and emulated by software.
 
-  * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
-    in hardware and are faster than the corresponding aligned accesses
-    sequences.
+  * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster
+    than equivalent byte accesses.
 
   * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
     not supported at all and will generate a misaligned address fault.
index 05ef904..8fdb20c 100644 (file)
@@ -42,7 +42,7 @@ KASAN有三种模式:
 体系架构
 ~~~~~~~~
 
-在x86_64、arm、arm64、powerpc、riscv、s390和xtensa上支持通用KASAN,
+在x86_64、arm、arm64、powerpc、riscv、s390、xtensa和loongarch上支持通用KASAN,
 而基于标签的KASAN模式只在arm64上支持。
 
 编译器
index af94e71..7b1d401 100644 (file)
@@ -528,6 +528,8 @@ families may, however, require a larger buffer. 32kB buffer is recommended
 for most efficient handling of dumps (larger buffer fits more dumped
 objects and therefore fewer recvmsg() calls are needed).
 
+.. _classic_netlink:
+
 Classic Netlink
 ===============
 
index 73db30c..21a7578 100644 (file)
@@ -2259,6 +2259,8 @@ Errors:
   EINVAL   invalid register ID, or no such register or used with VMs in
            protected virtualization mode on s390
   EPERM    (arm64) register access not allowed before vcpu finalization
+  EBUSY    (riscv) changing register value not allowed after the vcpu
+           has run at least once
   ======   ============================================================
 
 (These error codes are indicative only: do not rely on a specific error
@@ -3499,7 +3501,7 @@ VCPU matching underlying host.
 ---------------------
 
 :Capability: basic
-:Architectures: arm64, mips
+:Architectures: arm64, mips, riscv
 :Type: vcpu ioctl
 :Parameters: struct kvm_reg_list (in/out)
 :Returns: 0 on success; -1 on error
index 1cc0be8..42459a3 100644 (file)
@@ -6118,7 +6118,7 @@ F:        include/video/udlfb.h
 DISTRIBUTED LOCK MANAGER (DLM)
 M:     Christine Caulfield <ccaulfie@redhat.com>
 M:     David Teigland <teigland@redhat.com>
-L:     cluster-devel@redhat.com
+L:     gfs2@lists.linux.dev
 S:     Supported
 W:     http://sources.redhat.com/cluster/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm.git
@@ -8774,7 +8774,7 @@ F:        scripts/get_maintainer.pl
 GFS2 FILE SYSTEM
 M:     Bob Peterson <rpeterso@redhat.com>
 M:     Andreas Gruenbacher <agruenba@redhat.com>
-L:     cluster-devel@redhat.com
+L:     gfs2@lists.linux.dev
 S:     Supported
 B:     https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=gfs2
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
@@ -11382,6 +11382,7 @@ F:      scripts/dummy-tools/
 F:     scripts/mk*
 F:     scripts/mod/
 F:     scripts/package/
+F:     usr/
 
 KERNEL HARDENING (not covered by other areas)
 M:     Kees Cook <keescook@chromium.org>
@@ -11588,6 +11589,8 @@ F:      arch/x86/include/uapi/asm/svm.h
 F:     arch/x86/include/uapi/asm/vmx.h
 F:     arch/x86/kvm/
 F:     arch/x86/kvm/*/
+F:     tools/testing/selftests/kvm/*/x86_64/
+F:     tools/testing/selftests/kvm/x86_64/
 
 KERNFS
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
@@ -12390,6 +12393,14 @@ S:     Maintained
 F:     Documentation/devicetree/bindings/pinctrl/loongson,ls2k-pinctrl.yaml
 F:     drivers/pinctrl/pinctrl-loongson2.c
 
+LOONGSON-2 SOC SERIES THERMAL DRIVER
+M:     zhanghongchen <zhanghongchen@loongson.cn>
+M:     Yinbo Zhu <zhuyinbo@loongson.cn>
+L:     linux-pm@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/thermal/loongson,ls2k-thermal.yaml
+F:     drivers/thermal/loongson2_thermal.c
+
 LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
 M:     Sathya Prakash <sathya.prakash@broadcom.com>
 M:     Sreekanth Reddy <sreekanth.reddy@broadcom.com>
@@ -13734,6 +13745,7 @@ F:      include/linux/memory_hotplug.h
 F:     include/linux/mm.h
 F:     include/linux/mmzone.h
 F:     include/linux/pagewalk.h
+F:     include/linux/rmap.h
 F:     include/trace/events/ksm.h
 F:     mm/
 F:     tools/mm/
@@ -16751,6 +16763,8 @@ L:      linux-kernel@vger.kernel.org
 S:     Supported
 W:     https://perf.wiki.kernel.org/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools.git perf-tools
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git perf-tools-next
 F:     arch/*/events/*
 F:     arch/*/events/*/*
 F:     arch/*/include/asm/perf_event.h
@@ -17176,9 +17190,9 @@ F:      kernel/sched/psi.c
 
 PRINTK
 M:     Petr Mladek <pmladek@suse.com>
-M:     Sergey Senozhatsky <senozhatsky@chromium.org>
 R:     Steven Rostedt <rostedt@goodmis.org>
 R:     John Ogness <john.ogness@linutronix.de>
+R:     Sergey Senozhatsky <senozhatsky@chromium.org>
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git
 F:     include/linux/printk.h
@@ -18072,7 +18086,6 @@ T:      git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
 F:     Documentation/admin-guide/rtc.rst
 F:     Documentation/devicetree/bindings/rtc/
 F:     drivers/rtc/
-F:     include/linux/platform_data/rtc-*
 F:     include/linux/rtc.h
 F:     include/linux/rtc/
 F:     include/uapi/linux/rtc.h
@@ -20395,6 +20408,13 @@ S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
 F:     drivers/staging/
 
+STANDALONE CACHE CONTROLLER DRIVERS
+M:     Conor Dooley <conor@kernel.org>
+L:     linux-riscv@lists.infradead.org
+S:     Maintained
+T:     git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
+F:     drivers/cache
+
 STARFIRE/DURALAN NETWORK DRIVER
 M:     Ion Badulescu <ionut@badula.org>
 S:     Odd Fixes
@@ -21233,7 +21253,7 @@ F:      sound/soc/ti/
 TEXAS INSTRUMENTS AUDIO (ASoC/HDA) DRIVERS
 M:     Shenghao Ding <shenghao-ding@ti.com>
 M:     Kevin Lu <kevin-lu@ti.com>
-M:     Baojun Xu <x1077012@ti.com>
+M:     Baojun Xu <baojun.xu@ti.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/sound/tas2552.txt
@@ -21796,6 +21816,20 @@ S:     Maintained
 F:     Documentation/hwmon/tps546d24.rst
 F:     drivers/hwmon/pmbus/tps546d24.c
 
+TQ SYSTEMS BOARD & DRIVER SUPPORT
+L:     linux@ew.tq-group.com
+S:     Supported
+W:     https://www.tq-group.com/en/products/tq-embedded/
+F:     arch/arm/boot/dts/imx*mba*.dts*
+F:     arch/arm/boot/dts/imx*tqma*.dts*
+F:     arch/arm/boot/dts/mba*.dtsi
+F:     arch/arm64/boot/dts/freescale/imx*mba*.dts*
+F:     arch/arm64/boot/dts/freescale/imx*tqma*.dts*
+F:     arch/arm64/boot/dts/freescale/mba*.dtsi
+F:     drivers/gpio/gpio-tqmx86.c
+F:     drivers/mfd/tqmx86.c
+F:     drivers/watchdog/tqmx86_wdt.c
+
 TRACING
 M:     Steven Rostedt <rostedt@goodmis.org>
 M:     Masami Hiramatsu <mhiramat@kernel.org>
@@ -23072,9 +23106,9 @@ F:      drivers/net/vrf.c
 VSPRINTF
 M:     Petr Mladek <pmladek@suse.com>
 M:     Steven Rostedt <rostedt@goodmis.org>
-M:     Sergey Senozhatsky <senozhatsky@chromium.org>
 R:     Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 R:     Rasmus Villemoes <linux@rasmusvillemoes.dk>
+R:     Sergey Senozhatsky <senozhatsky@chromium.org>
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux.git
 F:     Documentation/core-api/printk-formats.rst
index 4f283d9..73f23fa 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -280,8 +280,8 @@ no-dot-config-targets := $(clean-targets) \
 # Installation targets should not require compiler. Unfortunately, vdso_install
 # is an exception where build artifacts may be updated. This must be fixed.
 no-compiler-targets := $(no-dot-config-targets) install dtbs_install \
-                       headers_install modules_install kernelrelease image_name
-no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease \
+                       headers_install modules_install modules_sign kernelrelease image_name
+no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \
                          image_name
 single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.symtypes %/
 
@@ -510,7 +510,6 @@ LEX         = flex
 YACC           = bison
 AWK            = awk
 INSTALLKERNEL  := installkernel
-DEPMOD         = depmod
 PERL           = perl
 PYTHON3                = python3
 CHECK          = sparse
@@ -564,14 +563,6 @@ KBUILD_CFLAGS += -funsigned-char
 KBUILD_CFLAGS += -fno-common
 KBUILD_CFLAGS += -fno-PIE
 KBUILD_CFLAGS += -fno-strict-aliasing
-KBUILD_CFLAGS += -Wall
-KBUILD_CFLAGS += -Wundef
-KBUILD_CFLAGS += -Werror=implicit-function-declaration
-KBUILD_CFLAGS += -Werror=implicit-int
-KBUILD_CFLAGS += -Werror=return-type
-KBUILD_CFLAGS += -Werror=strict-prototypes
-KBUILD_CFLAGS += -Wno-format-security
-KBUILD_CFLAGS += -Wno-trigraphs
 
 KBUILD_CPPFLAGS := -D__KERNEL__
 KBUILD_RUSTFLAGS := $(rust_common_flags) \
@@ -824,10 +815,6 @@ endif # may-sync-config
 endif # need-config
 
 KBUILD_CFLAGS  += -fno-delete-null-pointer-checks
-KBUILD_CFLAGS  += $(call cc-disable-warning,frame-address,)
-KBUILD_CFLAGS  += $(call cc-disable-warning, format-truncation)
-KBUILD_CFLAGS  += $(call cc-disable-warning, format-overflow)
-KBUILD_CFLAGS  += $(call cc-disable-warning, address-of-packed-member)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
 KBUILD_CFLAGS += -O2
@@ -858,40 +845,15 @@ ifdef CONFIG_READABLE_ASM
 KBUILD_CFLAGS += -fno-reorder-blocks -fno-ipa-cp-clone -fno-partial-inlining
 endif
 
-ifneq ($(CONFIG_FRAME_WARN),0)
-KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN)
-endif
-
 stackp-flags-y                                    := -fno-stack-protector
 stackp-flags-$(CONFIG_STACKPROTECTOR)             := -fstack-protector
 stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG)      := -fstack-protector-strong
 
 KBUILD_CFLAGS += $(stackp-flags-y)
 
-KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
-KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
-KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
-
 KBUILD_RUSTFLAGS-$(CONFIG_WERROR) += -Dwarnings
 KBUILD_RUSTFLAGS += $(KBUILD_RUSTFLAGS-y)
 
-ifdef CONFIG_CC_IS_CLANG
-# The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable.
-KBUILD_CFLAGS += -Wno-gnu
-else
-
-# gcc inanely warns about local variables called 'main'
-KBUILD_CFLAGS += -Wno-main
-endif
-
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
-
-# These result in bogus false positives
-KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
-
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
 KBUILD_RUSTFLAGS += -Cforce-frame-pointers=y
@@ -1026,51 +988,12 @@ endif
 # arch Makefile may override CC so keep this after arch Makefile is included
 NOSTDINC_FLAGS += -nostdinc
 
-# Variable Length Arrays (VLAs) should not be used anywhere in the kernel
-KBUILD_CFLAGS += -Wvla
-
-# disable pointer signed / unsigned warnings in gcc 4.0
-KBUILD_CFLAGS += -Wno-pointer-sign
-
-# In order to make sure new function cast mismatches are not introduced
-# in the kernel (to avoid tripping CFI checking), the kernel should be
-# globally built with -Wcast-function-type.
-KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type)
-
 # To gain proper coverage for CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE,
 # the kernel uses only C99 flexible arrays for dynamically sized trailing
 # arrays. Enforce this for everything that may examine structure sizes and
 # perform bounds checking.
 KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3)
 
-# disable stringop warnings in gcc 8+
-KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
-
-# We'll want to enable this eventually, but it's not going away for 5.7 at least
-KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
-
-# Another good warning that we'll want to enable eventually
-KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
-
-# Enabled with W=2, disabled by default as noisy
-ifdef CONFIG_CC_IS_GCC
-KBUILD_CFLAGS += -Wno-maybe-uninitialized
-endif
-
-# The allocators already balk at large sizes, so silence the compiler
-# warnings for bounds checks involving those possible values. While
-# -Wno-alloc-size-larger-than would normally be used here, earlier versions
-# of gcc (<9.1) weirdly don't handle the option correctly when _other_
-# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX
-# doesn't work (as it is documented to), silently resolving to "0" prior to
-# version 9.1 (and producing an error more recently). Numeric values larger
-# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently
-# ignored, continuing to default to PTRDIFF_MAX. So, left with no other
-# choice, we must perform a versioned check to disable this warning.
-# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au
-KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
-KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
-
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS  += -fno-strict-overflow
 
@@ -1082,15 +1005,6 @@ ifdef CONFIG_CC_IS_GCC
 KBUILD_CFLAGS   += -fconserve-stack
 endif
 
-# Prohibit date/time macros, which would make the build non-deterministic
-KBUILD_CFLAGS   += -Werror=date-time
-
-# enforce correct pointer usage
-KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
-
-# Require designated initializers for all marked structures
-KBUILD_CFLAGS   += $(call cc-option,-Werror=designated-init)
-
 # change __FILE__ to the relative path from the srctree
 KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
 
@@ -1548,44 +1462,8 @@ modules: modules_prepare
 modules_prepare: prepare
        $(Q)$(MAKE) $(build)=scripts scripts/module.lds
 
-export modules_sign_only :=
-
-ifeq ($(CONFIG_MODULE_SIG),y)
-PHONY += modules_sign
-modules_sign: modules_install
-       @:
-
-# modules_sign is a subset of modules_install.
-# 'make modules_install modules_sign' is equivalent to 'make modules_install'.
-ifeq ($(filter modules_install,$(MAKECMDGOALS)),)
-modules_sign_only := y
-endif
-endif
-
 endif # CONFIG_MODULES
 
-modinst_pre :=
-ifneq ($(filter modules_install,$(MAKECMDGOALS)),)
-modinst_pre := __modinst_pre
-endif
-
-modules_install: $(modinst_pre)
-PHONY += __modinst_pre
-__modinst_pre:
-       @rm -rf $(MODLIB)/kernel
-       @rm -f $(MODLIB)/source
-       @mkdir -p $(MODLIB)
-ifdef CONFIG_MODULES
-       @ln -s $(abspath $(srctree)) $(MODLIB)/source
-       @if [ ! $(objtree) -ef  $(MODLIB)/build ]; then \
-               rm -f $(MODLIB)/build ; \
-               ln -s $(CURDIR) $(MODLIB)/build ; \
-       fi
-       @sed 's:^\(.*\)\.o$$:kernel/\1.ko:' modules.order > $(MODLIB)/modules.order
-endif
-       @cp -f modules.builtin $(MODLIB)/
-       @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/
-
 ###
 # Cleaning is done on three levels.
 # make clean     Delete most generated files
@@ -1594,7 +1472,7 @@ endif
 # make distclean Remove editor backup files, patch leftover files and the like
 
 # Directories & files removed with 'make clean'
-CLEAN_FILES += include/ksym vmlinux.symvers modules-only.symvers \
+CLEAN_FILES += vmlinux.symvers modules-only.symvers \
               modules.builtin modules.builtin.modinfo modules.nsdeps \
               compile_commands.json .thinlto-cache rust/test rust/doc \
               rust-project.json .vmlinux.objs .vmlinux.export.c
@@ -1608,7 +1486,7 @@ MRPROPER_FILES += include/config include/generated          \
                  certs/signing_key.pem \
                  certs/x509.genkey \
                  vmlinux-gdb.py \
-                 *.spec rpmbuild \
+                 kernel.spec rpmbuild \
                  rust/libmacros.so
 
 # clean - Delete most, but leave enough to build external modules
@@ -1675,7 +1553,6 @@ help:
        @echo  '  mrproper        - Remove all generated files + config + various backup files'
        @echo  '  distclean       - mrproper + remove editor backup and patch files'
        @echo  ''
-       @echo  'Configuration targets:'
        @$(MAKE) -f $(srctree)/scripts/kconfig/Makefile help
        @echo  ''
        @echo  'Other generic targets:'
@@ -1923,19 +1800,39 @@ help:
        @echo  '  rust-analyzer   - generate rust-project.json rust-analyzer support file'
        @echo  ''
 
+ifndef CONFIG_MODULES
+modules modules_install: __external_modules_error
 __external_modules_error:
        @echo >&2 '***'
        @echo >&2 '*** The present kernel disabled CONFIG_MODULES.'
        @echo >&2 '*** You cannot build or install external modules.'
        @echo >&2 '***'
        @false
+endif
 
 endif # KBUILD_EXTMOD
 
 # ---------------------------------------------------------------------------
 # Modules
 
-PHONY += modules modules_install modules_prepare
+PHONY += modules modules_install modules_sign modules_prepare
+
+modules_install:
+       $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst \
+       sign-only=$(if $(filter modules_install,$(MAKECMDGOALS)),,y)
+
+ifeq ($(CONFIG_MODULE_SIG),y)
+# modules_sign is a subset of modules_install.
+# 'make modules_install modules_sign' is equivalent to 'make modules_install'.
+modules_sign: modules_install
+       @:
+else
+modules_sign:
+       @echo >&2 '***'
+       @echo >&2 '*** CONFIG_MODULE_SIG is disabled. You cannot sign modules.'
+       @echo >&2 '***'
+       @false
+endif
 
 ifdef CONFIG_MODULES
 
@@ -1953,22 +1850,9 @@ PHONY += modules_check
 modules_check: $(MODORDER)
        $(Q)$(CONFIG_SHELL) $(srctree)/scripts/modules-check.sh $<
 
-quiet_cmd_depmod = DEPMOD  $(MODLIB)
-      cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
-                   $(KERNELRELEASE)
-
-modules_install:
-       $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
-       $(call cmd,depmod)
-
 else # CONFIG_MODULES
 
-# Modules not configured
-# ---------------------------------------------------------------------------
-
-PHONY += __external_modules_error
-
-modules modules_install: __external_modules_error
+modules:
        @:
 
 KBUILD_MODULES :=
@@ -2147,6 +2031,10 @@ kernelversion:
 image_name:
        @echo $(KBUILD_IMAGE)
 
+PHONY += run-command
+run-command:
+       $(Q)$(KBUILD_RUN_COMMAND)
+
 quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN   $(wildcard $(rm-files)))
       cmd_rmfiles = rm -rf $(rm-files)
 
index dd31e97..396caec 100644 (file)
@@ -3,6 +3,5 @@
 generated-y += syscall_table.h
 generic-y += agp.h
 generic-y += asm-offsets.h
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
index b13c4a2..36b63f2 100644 (file)
@@ -3,8 +3,8 @@
  *     arch/alpha/lib/callback_srm.S
  */
 
+#include <linux/export.h>
 #include <asm/console.h>
-#include <asm/export.h>
 
 .text
 #define HWRPB_CRB_OFFSET 0xc0
index ce02de7..af70ee3 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Zero an entire page.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .global clear_page
index db6c6ca..848eb60 100644 (file)
@@ -10,7 +10,7 @@
  * a successful copy).  There is also some rather minor exception setup
  * stuff.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* Allow an exception for an insn; exit if we get one.  */
 #define EX(x,y...)                     \
index 5439a30..1c444fd 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Copy an entire page.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .global copy_page
index 32ab034..ef18faa 100644 (file)
@@ -12,7 +12,7 @@
  * exception setup stuff..
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* Allow an exception for an insn; exit if we get one.  */
 #define EXI(x,y...)                    \
index c7b213a..273c426 100644 (file)
@@ -13,7 +13,7 @@
  * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .globl csum_ipv6_magic
        .align 4
        .ent csum_ipv6_magic
index 2b60eb4..db01840 100644 (file)
@@ -46,7 +46,7 @@
  *     $28 - compare status
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #define halt .long 0
 
 /*
index 325864c..a534d9f 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Zero an entire page.
  */
-#include <asm/export.h>
+#include <linux/export.h>
         .text
         .align 4
         .global clear_page
index 7e644f8..af776cc 100644 (file)
@@ -29,7 +29,7 @@
  *     want to leave a hole (and we also want to avoid repeating lots of work)
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 /* Allow an exception for an insn; exit if we get one.  */
 #define EX(x,y...)                     \
        99: x,##y;                      \
index fd7212c..36be511 100644 (file)
@@ -57,7 +57,7 @@
    destination pages are in the dcache, but it is my guess that this is
    less important than the dcache miss case.  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .global copy_page
index f3e4337..b9b1971 100644 (file)
@@ -23,7 +23,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 /* Allow an exception for an insn; exit if we get one.  */
 #define EXI(x,y...)                    \
        99: x,##y;                      \
index 9a73f90..2ee548b 100644 (file)
@@ -53,7 +53,7 @@
  * may cause additional delay in rare cases (load-load replay traps).
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .globl csum_ipv6_magic
        .align 4
        .ent csum_ipv6_magic
index 137ff1a..b73a6d2 100644 (file)
@@ -56,7 +56,7 @@
  * Try not to change the actual algorithm if possible for consistency.
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #define halt .long 0
 
 /*
index 56bf9e1..f75ba43 100644 (file)
@@ -28,7 +28,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  * Try not to change the actual algorithm if possible for consistency.
  */
-#include <asm/export.h>
+#include <linux/export.h>
         .set noreorder
         .set noat
 
index ffbd056..3ef43c2 100644 (file)
@@ -20,7 +20,7 @@
  * Temp usage notes:
  *     $1,$2,          - scratch
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noreorder
        .set noat
 
index 1cfcfbb..89d7809 100644 (file)
@@ -27,7 +27,7 @@
  * as fixes will need to be made in multiple places.  The performance gain
  * is worth it.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noat
        .set noreorder
 .text
index ec3096a..f8c7305 100644 (file)
@@ -20,7 +20,7 @@
  * string once.
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
        .align 4
index fbf89e0..97a7cb4 100644 (file)
@@ -16,7 +16,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  * Try not to change the actual algorithm if possible for consistency.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
index b73106f..3d90788 100644 (file)
@@ -18,7 +18,7 @@
  *     U       - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noreorder
        .set noat
 
index ceb0ca5..8f31323 100644 (file)
@@ -21,7 +21,7 @@
  * Try not to change the actual algorithm if possible for consistency.
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
        .align 4
index 7f80e39..ae7355f 100644 (file)
@@ -19,7 +19,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
index c13d3ec..45366e3 100644 (file)
@@ -31,7 +31,7 @@ For correctness consider that:
       - only minimum number of quadwords may be accessed
       - the third argument is an unsigned long
 */
-#include <asm/export.h>
+#include <linux/export.h>
         .set noreorder
         .set noat
 
index 42d1922..3a27689 100644 (file)
@@ -7,7 +7,7 @@
  * This is hand-massaged output from the original memcpy.c.  We defer to
  * memcpy whenever possible; the backwards copy loops are not unrolled.
  */
-#include <asm/export.h>        
+#include <linux/export.h>
        .set noat
        .set noreorder
        .text
index 00393e3..9075d69 100644 (file)
@@ -14,7 +14,7 @@
  * The scheduling comments are according to the EV5 documentation (and done by 
  * hand, so they might well be incorrect, please do tell me about it..)
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noat
        .set noreorder
 .text
index 055877d..62b90eb 100644 (file)
@@ -5,7 +5,7 @@
  *
  * Append a null-terminated string from SRC to DST.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 
        .text
 
index 17871dd..68c54ff 100644 (file)
@@ -6,7 +6,7 @@
  * Return the address of a given character within a null-terminated
  * string, or null if it is not found.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
index cb74ad2..d8773ba 100644 (file)
@@ -6,7 +6,7 @@
  * Copy a null-terminated string from SRC to DST.  Return a pointer
  * to the null-terminator in the source.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
        .align 3
index dd882fe..4fc6a6f 100644 (file)
@@ -12,7 +12,7 @@
  *       do this instead of the 9 instructions that
  *       binary search needs).
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noreorder
        .set noat
 
index 522fee3..a913a7c 100644 (file)
@@ -10,7 +10,7 @@
  * past count, whereas libc may write to count+1.  This follows the generic
  * implementation in lib/string.c and is, IMHO, more sensible.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
        .align 3
index cc57fad..cb90cf0 100644 (file)
@@ -11,7 +11,7 @@
  * version has cropped that bit o' nastiness as well as assuming that
  * __stxncpy is in range of a branch.
  */
-#include <asm/export.h>
+#include <linux/export.h>
        .set noat
        .set noreorder
 
index 7650ba9..dd8e073 100644 (file)
@@ -6,7 +6,7 @@
  * Return the address of the last occurrence of a given character
  * within a null-terminated string, or null if it is not found.
  */
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
index b887aa5..96f0591 100644 (file)
@@ -25,7 +25,7 @@
  # along with GCC; see the file COPYING.  If not, write to the 
  # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
  # MA 02111-1307, USA.
-#include <asm/export.h>
+#include <linux/export.h>
 
         .set noreorder
         .set noat
index 6f4995a..3162db5 100644 (file)
@@ -27,6 +27,8 @@ config ARC
        select GENERIC_SCHED_CLOCK
        select GENERIC_SMP_IDLE_THREAD
        select GENERIC_IOREMAP
+       select GENERIC_STRNCPY_FROM_USER if MMU
+       select GENERIC_STRNLEN_USER if MMU
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4
@@ -491,11 +493,11 @@ config ARC_KVADDR_SIZE
          kernel-user gutter)
 
 config ARC_CURR_IN_REG
-       bool "Dedicate Register r25 for current_task pointer"
+       bool "cache current task pointer in gp"
        default y
        help
-         This reserved Register R25 to point to Current Task in
-         kernel mode. This saves memory access for each such access
+         This reserves gp register to point to Current Task in
+         kernel mode eliding memory access for each access
 
 
 config ARC_EMUL_UNALIGNED
index 329400a..2390dd0 100644 (file)
@@ -28,14 +28,14 @@ cflags-y                            += $(tune-mcpu-def-y)
 endif
 endif
 
-
 ifdef CONFIG_ARC_CURR_IN_REG
 # For a global register definition, make sure it gets passed to every file
 # We had a customer reported bug where some code built in kernel was NOT using
-# any kernel headers, and missing the r25 global register
+# any kernel headers, and missing the global register
 # Can't do unconditionally because of recursive include issues
 # due to <linux/thread_info.h>
 LINUXINCLUDE   +=  -include $(srctree)/arch/arc/include/asm/current.h
+cflags-y       += -ffixed-gp
 endif
 
 cflags-y                               += -fsection-anchors
@@ -67,7 +67,7 @@ cflags-$(CONFIG_ARC_DW2_UNWIND)               += -fasynchronous-unwind-tables $(cfi)
 # small data is default for elf32 tool-chain. If not usable, disable it
 # This also allows repurposing GP as scratch reg to gcc reg allocator
 disable_small_data := y
-cflags-$(disable_small_data)           += -mno-sdata -fcall-used-gp
+cflags-$(disable_small_data)           += -mno-sdata
 
 cflags-$(CONFIG_CPU_BIG_ENDIAN)                += -mbig-endian
 ldflags-$(CONFIG_CPU_BIG_ENDIAN)       += -EB
index 2162023..4b13f60 100644 (file)
@@ -23,7 +23,7 @@
 #define ARC_REG_ICCM_BUILD     0x78    /* ICCM size (common) */
 #define ARC_REG_XY_MEM_BCR     0x79
 #define ARC_REG_MAC_BCR                0x7a
-#define ARC_REG_MUL_BCR                0x7b
+#define ARC_REG_MPY_BCR                0x7b
 #define ARC_REG_SWAP_BCR       0x7c
 #define ARC_REG_NORM_BCR       0x7d
 #define ARC_REG_MIXMAX_BCR     0x7e
@@ -177,7 +177,7 @@ struct bcr_isa_arcv2 {
 #endif
 };
 
-struct bcr_uarch_build_arcv2 {
+struct bcr_uarch_build {
 #ifdef CONFIG_CPU_BIG_ENDIAN
        unsigned int pad:8, prod:8, maj:8, min:8;
 #else
@@ -185,6 +185,59 @@ struct bcr_uarch_build_arcv2 {
 #endif
 };
 
+struct bcr_mmu_3 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
+                    u_itlb:4, u_dtlb:4;
+#else
+       unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
+                    ways:4, ver:8;
+#endif
+};
+
+struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+                    n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+       /*           DTLB      ITLB      JES        JE         JA      */
+       unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+                    pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+};
+
+struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+       unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+};
+
+struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+       unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+};
+
+struct bcr_clust_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
+#else
+       unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
+#endif
+};
+
+struct bcr_volatile {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       unsigned int start:4, limit:4, pad:22, order:1, disable:1;
+#else
+       unsigned int disable:1, order:1, pad:22, limit:4, start:4;
+#endif
+};
+
 struct bcr_mpy {
 #ifdef CONFIG_CPU_BIG_ENDIAN
        unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8;
@@ -302,48 +355,6 @@ struct bcr_generic {
 #endif
 };
 
-/*
- *******************************************************************
- * Generic structures to hold build configuration used at runtime
- */
-
-struct cpuinfo_arc_mmu {
-       unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1;
-       unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8;
-};
-
-struct cpuinfo_arc_cache {
-       unsigned int sz_k:14, line_len:8, assoc:4, alias:1, vipt:1, pad:4;
-};
-
-struct cpuinfo_arc_bpu {
-       unsigned int ver, full, num_cache, num_pred, ret_stk;
-};
-
-struct cpuinfo_arc_ccm {
-       unsigned int base_addr, sz;
-};
-
-struct cpuinfo_arc {
-       struct cpuinfo_arc_cache icache, dcache, slc;
-       struct cpuinfo_arc_mmu mmu;
-       struct cpuinfo_arc_bpu bpu;
-       struct bcr_identity core;
-       struct bcr_isa_arcv2 isa;
-       const char *release, *name;
-       unsigned int vec_base;
-       struct cpuinfo_arc_ccm iccm, dccm;
-       struct {
-               unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
-                            fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
-                            ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
-                            timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
-       } extn;
-       struct bcr_mpy extn_mpy;
-};
-
-extern struct cpuinfo_arc cpuinfo_arc700[];
-
 static inline int is_isa_arcv2(void)
 {
        return IS_ENABLED(CONFIG_ISA_ARCV2);
index 1b0ffae..5258cb8 100644 (file)
@@ -18,7 +18,7 @@ static inline void arch_atomic_##op(int i, atomic_t *v)                       \
        : [val] "=&r"   (val) /* Early clobber to prevent reg reuse */  \
        : [ctr] "r"     (&v->counter), /* Not "m": llock only supports reg direct addr mode */  \
          [i]   "ir"    (i)                                             \
-       : "cc");                                                        \
+       : "cc", "memory");                                              \
 }                                                                      \
 
 #define ATOMIC_OP_RETURN(op, asm_op)                           \
@@ -34,7 +34,7 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)       \
        : [val] "=&r"   (val)                                           \
        : [ctr] "r"     (&v->counter),                                  \
          [i]   "ir"    (i)                                             \
-       : "cc");                                                        \
+       : "cc", "memory");                                              \
                                                                        \
        return val;                                                     \
 }
@@ -56,7 +56,7 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)        \
          [orig] "=&r" (orig)                                           \
        : [ctr] "r"     (&v->counter),                                  \
          [i]   "ir"    (i)                                             \
-       : "cc");                                                        \
+       : "cc", "memory");                                              \
                                                                        \
        return orig;                                                    \
 }
index 6b6db98..9b5791b 100644 (file)
@@ -60,7 +60,7 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v)           \
        "       bnz     1b              \n"                             \
        : "=&r"(val)                                                    \
        : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");                                                        \
+       : "cc", "memory");                                              \
 }                                                                      \
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)                               \
@@ -77,7 +77,7 @@ static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)   \
        "       bnz     1b              \n"                             \
        : [val] "=&r"(val)                                              \
        : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");        /* memory clobber comes from smp_mb() */        \
+       : "cc", "memory");                                              \
                                                                        \
        return val;                                                     \
 }
@@ -99,7 +99,7 @@ static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)    \
        "       bnz     1b              \n"                             \
        : "=&r"(orig), "=&r"(val)                                       \
        : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");        /* memory clobber comes from smp_mb() */        \
+       : "cc", "memory");                                              \
                                                                        \
        return orig;                                                    \
 }
index 9b9bdd3..06be89f 100644 (file)
@@ -13,7 +13,7 @@
 
 #ifdef CONFIG_ARC_CURR_IN_REG
 
-register struct task_struct *curr_arc asm("r25");
+register struct task_struct *curr_arc asm("gp");
 #define current (curr_arc)
 
 #else
index 5f4de05..a0d5ebe 100644 (file)
 
 #ifdef ARC_DW2_UNWIND_AS_CFI
 
-#define CFI_STARTPROC  .cfi_startproc
-#define CFI_ENDPROC    .cfi_endproc
-#define CFI_DEF_CFA    .cfi_def_cfa
-#define CFI_REGISTER   .cfi_register
-#define CFI_REL_OFFSET .cfi_rel_offset
-#define CFI_UNDEFINED  .cfi_undefined
+#define CFI_STARTPROC          .cfi_startproc
+#define CFI_ENDPROC            .cfi_endproc
+#define CFI_DEF_CFA            .cfi_def_cfa
+#define CFI_DEF_CFA_OFFSET     .cfi_def_cfa_offset
+#define CFI_DEF_CFA_REGISTER   .cfi_def_cfa_register
+#define CFI_OFFSET             .cfi_offset
+#define CFI_REL_OFFSET         .cfi_rel_offset
+#define CFI_REGISTER           .cfi_register
+#define CFI_RESTORE            .cfi_restore
+#define CFI_UNDEFINED          .cfi_undefined
 
 #else
 
 #define CFI_IGNORE     #
 
-#define CFI_STARTPROC  CFI_IGNORE
-#define CFI_ENDPROC    CFI_IGNORE
-#define CFI_DEF_CFA    CFI_IGNORE
-#define CFI_REGISTER   CFI_IGNORE
-#define CFI_REL_OFFSET CFI_IGNORE
-#define CFI_UNDEFINED  CFI_IGNORE
+#define CFI_STARTPROC          CFI_IGNORE
+#define CFI_ENDPROC            CFI_IGNORE
+#define CFI_DEF_CFA            CFI_IGNORE
+#define CFI_DEF_CFA_OFFSET     CFI_IGNORE
+#define CFI_DEF_CFA_REGISTER   CFI_IGNORE
+#define CFI_OFFSET             CFI_IGNORE
+#define CFI_REL_OFFSET         CFI_IGNORE
+#define CFI_REGISTER           CFI_IGNORE
+#define CFI_RESTORE            CFI_IGNORE
+#define CFI_UNDEFINED          CFI_IGNORE
 
 #endif /* !ARC_DW2_UNWIND_AS_CFI */
 
index 0ff4c06..4d13320 100644 (file)
@@ -18,7 +18,6 @@
  *              |      orig_r0      |
  *              |      event/ECR    |
  *              |      bta          |
- *              |      user_r25     |
  *              |      gp           |
  *              |      fp           |
  *              |      sp           |
 /*------------------------------------------------------------------------*/
 .macro INTERRUPT_PROLOGUE
 
-       ; (A) Before jumping to Interrupt Vector, hardware micro-ops did following:
+       ; Before jumping to Interrupt Vector, hardware micro-ops did following:
        ;   1. SP auto-switched to kernel mode stack
        ;   2. STATUS32.Z flag set if in U mode at time of interrupt (U:1,K:0)
        ;   3. Auto save: (mandatory) Push PC and STAT32 on stack
        ;                 hardware does even if CONFIG_ARC_IRQ_NO_AUTOSAVE
-       ;   4. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
+       ;  4a. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
        ;
-       ; (B) Manually saved some regs: r12,r25,r30, sp,fp,gp, ACCL pair
+       ; Now
+       ;  4b. If Auto-save (optional) not enabled in hw, manually save them
+       ;   5. Manually save: r12,r30, sp,fp,gp, ACCL pair
+       ;
+       ; At the end, SP points to pt_regs
 
 #ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
        ; carve pt_regs on stack (case #3), PC/STAT32 already on stack
 .endm
 
 /*------------------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
 
-       ; (A) Before jumping to Exception Vector, hardware micro-ops did following:
+       ; Before jumping to Exception Vector, hardware micro-ops did following:
        ;   1. SP auto-switched to kernel mode stack
        ;   2. STATUS32.Z flag set if in U mode at time of exception (U:1,K:0)
        ;
-       ; (B) Manually save the complete reg file below
+       ; Now manually save rest of reg file
+       ; At the end, SP points to pt_regs
 
-       sub     sp, sp, SZ_PT_REGS      ; carve pt_regs
+       sub     sp, sp, SZ_PT_REGS      ; carve space for pt_regs
 
        ; _HARD saves r10 clobbered by _SOFT as scratch hence comes first
 
        ; OUTPUT: r10 has ECR expected by EV_Trap
 .endm
 
+.macro EXCEPTION_PROLOGUE
+
+       EXCEPTION_PROLOGUE_KEEP_AE      ; return ECR in r10
+
+       lr  r0, [efa]
+       mov r1, sp
+
+       FAKE_RET_FROM_EXCPN             ; clobbers r9
+.endm
+
 /*------------------------------------------------------------------------
  * This macro saves the registers manually which would normally be autosaved
  * by hardware on taken interrupts. It is used by
  */
 .macro __SAVE_REGFILE_SOFT
 
-       ST2     gp, fp, PT_r26          ; gp (r26), fp (r27)
-
-       st      r12, [sp, PT_sp + 4]
-       st      r30, [sp, PT_sp + 8]
+       st      fp,  [sp, PT_fp]        ; r27
+       st      r30, [sp, PT_r30]
+       st      r12, [sp, PT_r12]
+       st      r26, [sp, PT_r26]       ; gp
 
        ; Saving pt_regs->sp correctly requires some extra work due to the way
        ; Auto stack switch works
 
        ; ISA requires ADD.nz to have same dest and src reg operands
        mov.nz  r10, sp
-       add.nz  r10, r10, SZ_PT_REGS    ; K mode SP
+       add2.nz r10, r10, SZ_PT_REGS/4  ; K mode SP
 
        st      r10, [sp, PT_sp]        ; SP (pt_regs->sp)
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       st      r25, [sp, PT_user_r25]
-       GET_CURR_TASK_ON_CPU    r25
-#endif
-
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
        ST2     r58, r59, PT_r58
 #endif
 
        /* clobbers r10, r11 registers pair */
        DSP_SAVE_REGFILE_IRQ
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+       GET_CURR_TASK_ON_CPU    gp
+#endif
+
 .endm
 
 /*------------------------------------------------------------------------*/
 .macro __RESTORE_REGFILE_SOFT
 
-       LD2     gp, fp, PT_r26          ; gp (r26), fp (r27)
-
-       ld      r12, [sp, PT_r12]
+       ld      fp,  [sp, PT_fp]
        ld      r30, [sp, PT_r30]
+       ld      r12, [sp, PT_r12]
+       ld      r26, [sp, PT_r26]
 
        ; Restore SP (into AUX_USER_SP) only if returning to U mode
        ;  - for K mode, it will be implicitly restored as stack is unwound
        sr      r10, [AUX_USER_SP]
 1:
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       ld      r25, [sp, PT_user_r25]
-#endif
-
        /* clobbers r10, r11 registers pair */
        DSP_RESTORE_REGFILE_IRQ
 
 
        btst    r0, STATUS_U_BIT        ; Z flag set if K, used in restoring SP
 
-       ld      r10, [sp, PT_event + 4]
+       ld      r10, [sp, PT_bta]
        sr      r10, [erbta]
 
        LD2     r10, r11, PT_ret
 
 .macro FAKE_RET_FROM_EXCPN
        lr      r9, [status32]
-       bic     r9, r9, STATUS_AE_MASK
-       or      r9, r9, STATUS_IE_MASK
+       bclr    r9, r9, STATUS_AE_BIT
+       bset    r9, r9, STATUS_IE_BIT
        kflag   r9
 .endm
 
index 67ff06e..a0e760e 100644 (file)
  *
  * After this it is safe to call the "C" handlers
  *-------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
 
        /* Need at least 1 reg to code the early exception prologue */
        PROLOG_FREEUP_REG r9, @ex_saved_reg1
        /* ARC700 doesn't provide auto-stack switching */
        SWITCH_TO_KERNEL_STK
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       /* Treat r25 as scratch reg (save on stack) and load with "current" */
-       PUSH    r25
-       GET_CURR_TASK_ON_CPU   r25
-#else
-       sub     sp, sp, 4
-#endif
-
        st.a    r0, [sp, -8]    /* orig_r0 needed for syscall (skip ECR slot) */
        sub     sp, sp, 4       /* skip pt_regs->sp, already saved above */
 
        PUSHAX  erbta
 
        lr      r10, [ecr]
-       st      r10, [sp, PT_event]    /* EV_Trap expects r10 to have ECR */
+       st      r10, [sp, PT_event]
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+       /* gp already saved on stack: now load with "current" */
+       GET_CURR_TASK_ON_CPU   gp
+#endif
+       ; OUTPUT: r10 has ECR expected by EV_Trap
+.endm
+
+.macro EXCEPTION_PROLOGUE
+
+       EXCEPTION_PROLOGUE_KEEP_AE      ; return ECR in r10
+
+       lr  r0, [efa]
+       mov r1, sp
+
+       FAKE_RET_FROM_EXCPN             ; clobbers r9
 .endm
 
 /*--------------------------------------------------------------
        POP     gp
        RESTORE_R12_TO_R0
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       ld      r25, [sp, 12]
-#endif
        ld  sp, [sp] /* restore original sp */
-       /* orig_r0, ECR, user_r25 skipped automatically */
+       /* orig_r0, ECR skipped automatically */
 .endm
 
 /* Dummy ECR values for Interrupts */
 
        SWITCH_TO_KERNEL_STK
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       /* Treat r25 as scratch reg (save on stack) and load with "current" */
-       PUSH    r25
-       GET_CURR_TASK_ON_CPU   r25
-#else
-       sub     sp, sp, 4
-#endif
 
        PUSH    0x003\LVL\()abcd    /* Dummy ECR */
        sub     sp, sp, 8           /* skip orig_r0 (not needed)
        PUSHAX  lp_start
        PUSHAX  bta_l\LVL\()
 
+#ifdef CONFIG_ARC_CURR_IN_REG
+       /* gp already saved on stack: now load with "current" */
+       GET_CURR_TASK_ON_CPU   gp
+#endif
 .endm
 
 /*--------------------------------------------------------------
        POP     gp
        RESTORE_R12_TO_R0
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       ld      r25, [sp, 12]
-#endif
-       ld  sp, [sp] /* restore original sp */
-       /* orig_r0, ECR, user_r25 skipped automatically */
+       ld  sp, [sp] /* restore original sp; orig_r0, ECR skipped implicitly */
 .endm
 
 /* Get thread_info of "current" tsk */
index fcdd59d..49c2e09 100644 (file)
@@ -13,6 +13,8 @@
 #include <asm/processor.h>     /* For VMALLOC_START */
 #include <asm/mmu.h>
 
+#ifdef __ASSEMBLY__
+
 #ifdef CONFIG_ISA_ARCOMPACT
 #include <asm/entry-compact.h> /* ISA specific bits */
 #else
@@ -89,7 +91,7 @@
  * Helpers to save/restore callee-saved regs:
  * used by several macros below
  *-------------------------------------------------------------*/
-.macro SAVE_R13_TO_R24
+.macro SAVE_R13_TO_R25
        PUSH    r13
        PUSH    r14
        PUSH    r15
        PUSH    r22
        PUSH    r23
        PUSH    r24
+       PUSH    r25
 .endm
 
-.macro RESTORE_R24_TO_R13
+.macro RESTORE_R25_TO_R13
+       POP     r25
        POP     r24
        POP     r23
        POP     r22
        POP     r13
 .endm
 
-/*--------------------------------------------------------------
- * Collect User Mode callee regs as struct callee_regs - needed by
- * fork/do_signal/unaligned-access-emulation.
- * (By default only scratch regs are saved on entry to kernel)
- *
- * Special handling for r25 if used for caching Task Pointer.
- * It would have been saved in task->thread.user_r25 already, but to keep
- * the interface same it is copied into regular r25 placeholder in
- * struct callee_regs.
- *-------------------------------------------------------------*/
+/*
+ * save user mode callee regs as struct callee_regs
+ *  - needed by fork/do_signal/unaligned-access-emulation.
+ */
 .macro SAVE_CALLEE_SAVED_USER
+       SAVE_R13_TO_R25
+.endm
 
-       mov     r12, sp         ; save SP as ref to pt_regs
-       SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-       ; Retrieve orig r25 and save it with rest of callee_regs
-       ld      r12, [r12, PT_user_r25]
-       PUSH    r12
-#else
-       PUSH    r25
-#endif
-
+/*
+ * restore user mode callee regs as struct callee_regs
+ *  - could have been changed by ptrace tracer or unaligned-access fixup
+ */
+.macro RESTORE_CALLEE_SAVED_USER
+       RESTORE_R25_TO_R13
 .endm
 
-/*--------------------------------------------------------------
- * Save kernel Mode callee regs at the time of Contect Switch.
- *
- * Special handling for r25 if used for caching Task Pointer.
- * Kernel simply skips saving it since it will be loaded with
- * incoming task pointer anyways
- *-------------------------------------------------------------*/
+/*
+ * save/restore kernel mode callee regs at the time of context switch
+ */
 .macro SAVE_CALLEE_SAVED_KERNEL
-
-       SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-       sub     sp, sp, 4
-#else
-       PUSH    r25
-#endif
+       SAVE_R13_TO_R25
 .endm
 
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_KERNEL
- *-------------------------------------------------------------*/
 .macro RESTORE_CALLEE_SAVED_KERNEL
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-       add     sp, sp, 4  /* skip usual r25 placeholder */
-#else
-       POP     r25
-#endif
-       RESTORE_R24_TO_R13
-.endm
-
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_USER
- *
- * ptrace tracer or unaligned-access fixup might have changed a user mode
- * callee reg which is saved back to usual r25 storage location
- *-------------------------------------------------------------*/
-.macro RESTORE_CALLEE_SAVED_USER
-
-#ifdef CONFIG_ARC_CURR_IN_REG
-       POP     r12
-#else
-       POP     r25
-#endif
-       RESTORE_R24_TO_R13
-
-       ; SP is back to start of pt_regs
-#ifdef CONFIG_ARC_CURR_IN_REG
-       st      r12, [sp, PT_user_r25]
-#endif
+       RESTORE_R25_TO_R13
 .endm
 
 /*--------------------------------------------------------------
 
 #ifdef CONFIG_SMP
 
-/*-------------------------------------------------
+/*
  * Retrieve the current running task on this CPU
- * 1. Determine curr CPU id.
- * 2. Use it to index into _current_task[ ]
+ *  - loads it from backing _current_task[] (and can't use the
+ *    caching reg for current task
  */
 .macro  GET_CURR_TASK_ON_CPU   reg
        GET_CPU_ID  \reg
        add2 \tmp, @_current_task, \tmp
        st   \tsk, [\tmp]
 #ifdef CONFIG_ARC_CURR_IN_REG
-       mov r25, \tsk
+       mov gp, \tsk
 #endif
 
 .endm
 .macro  SET_CURR_TASK_ON_CPU    tsk, tmp
        st  \tsk, [@_current_task]
 #ifdef CONFIG_ARC_CURR_IN_REG
-       mov r25, \tsk
+       mov gp, \tsk
 #endif
 .endm
 
 #endif /* SMP / UNI */
 
-/* ------------------------------------------------------------------
+/*
  * Get the ptr to some field of Current Task at @off in task struct
- *  -Uses r25 for Current task ptr if that is enabled
+ *  - Uses current task cached in reg if enabled
  */
-
 #ifdef CONFIG_ARC_CURR_IN_REG
 
 .macro GET_CURR_TASK_FIELD_PTR  off,  reg
-       add \reg, r25, \off
+       add \reg, gp, \off
 .endm
 
 #else
 
 #endif /* CONFIG_ARC_CURR_IN_REG */
 
+#else  /* !__ASSEMBLY__ */
+
+extern void do_signal(struct pt_regs *);
+extern void do_notify_resume(struct pt_regs *);
+extern int do_privilege_fault(unsigned long, struct pt_regs *);
+extern int do_extension_fault(unsigned long, struct pt_regs *);
+extern int insterror_is_error(unsigned long, struct pt_regs *);
+extern int do_memory_error(unsigned long, struct pt_regs *);
+extern int trap_is_brkpt(unsigned long, struct pt_regs *);
+extern int do_misaligned_error(unsigned long, struct pt_regs *);
+extern int do_trap5_error(unsigned long, struct pt_regs *);
+extern int do_misaligned_access(unsigned long, struct pt_regs *, struct callee_regs *);
+extern void do_machine_check_fault(unsigned long, struct pt_regs *);
+extern void do_non_swi_trap(unsigned long, struct pt_regs *);
+extern void do_insterror_or_kprobe(unsigned long, struct pt_regs *);
+extern void do_page_fault(unsigned long, struct pt_regs *);
+
+#endif
+
 #endif  /* __ASM_ARC_ENTRY_H */
index 0309cb4..c574712 100644 (file)
@@ -25,5 +25,6 @@
 #include <asm-generic/irq.h>
 
 extern void arc_init_IRQ(void);
+extern void arch_do_IRQ(unsigned int, struct pt_regs *);
 
 #endif
index ca427c3..9febf5b 100644 (file)
@@ -14,6 +14,8 @@ typedef struct {
        unsigned long asid[NR_CPUS];    /* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
+extern void do_tlb_overlap_fault(unsigned long, unsigned long, struct pt_regs *);
+
 #endif
 
 #include <asm/mmu-arcv2.h>
index fb844fc..d606658 100644 (file)
@@ -22,7 +22,6 @@
  * struct thread_info
  */
 struct thread_struct {
-       unsigned long ksp;      /* kernel mode stack pointer */
        unsigned long callee_reg;       /* pointer to callee regs */
        unsigned long fault_address;    /* dbls as brkpt holder as well */
 #ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
@@ -33,9 +32,7 @@ struct thread_struct {
 #endif
 };
 
-#define INIT_THREAD  {                          \
-       .ksp = sizeof(init_stack) + (unsigned long) init_stack, \
-}
+#define INIT_THREAD  { }
 
 /* Forward declaration, a strange C thing */
 struct task_struct;
@@ -56,7 +53,7 @@ struct task_struct;
  * Where about of Task's sp, fp, blink when it was last seen in kernel mode.
  * Look in process.c for details of kernel stack layout
  */
-#define TSK_K_ESP(tsk)         (tsk->thread.ksp)
+#define TSK_K_ESP(tsk)         (task_thread_info(tsk)->ksp)
 
 #define TSK_K_REG(tsk, off)    (*((unsigned long *)(TSK_K_ESP(tsk) + \
                                        sizeof(struct callee_regs) + off)))
index 5869a74..4a2b30f 100644 (file)
 
 #ifndef __ASSEMBLY__
 
+typedef union {
+       struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               unsigned long state:8, vec:8, cause:8, param:8;
+#else
+               unsigned long param:8, cause:8, vec:8, state:8;
+#endif
+       };
+       unsigned long full;
+} ecr_reg;
+
 /* THE pt_regs: Defines how regs are saved during entry into kernel */
 
 #ifdef CONFIG_ISA_ARCOMPACT
@@ -40,23 +51,10 @@ struct pt_regs {
         *      Last word used by Linux for extra state mgmt (syscall-restart)
         * For interrupts, use artificial ECR values to note current prio-level
         */
-       union {
-               struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-                       unsigned long state:8, ecr_vec:8,
-                                     ecr_cause:8, ecr_param:8;
-#else
-                       unsigned long ecr_param:8, ecr_cause:8,
-                                     ecr_vec:8, state:8;
-#endif
-               };
-               unsigned long event;
-       };
-
-       unsigned long user_r25;
+       ecr_reg ecr;
 };
 
-#define MAX_REG_OFFSET offsetof(struct pt_regs, user_r25)
+#define MAX_REG_OFFSET offsetof(struct pt_regs, ecr)
 
 #else
 
@@ -64,28 +62,14 @@ struct pt_regs {
 
        unsigned long orig_r0;
 
-       union {
-               struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-                       unsigned long state:8, ecr_vec:8,
-                                     ecr_cause:8, ecr_param:8;
-#else
-                       unsigned long ecr_param:8, ecr_cause:8,
-                                     ecr_vec:8, state:8;
-#endif
-               };
-               unsigned long event;
-       };
-
-       unsigned long bta;      /* bta_l1, bta_l2, erbta */
+       ecr_reg ecr;            /* Exception Cause Reg */
 
-       unsigned long user_r25;
+       unsigned long bta;      /* erbta */
 
-       unsigned long r26;      /* gp */
        unsigned long fp;
-       unsigned long sp;       /* user/kernel sp depending on where we came from  */
-
-       unsigned long r12, r30;
+       unsigned long r30;
+       unsigned long r12;
+       unsigned long r26;      /* gp */
 
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
        unsigned long r58, r59; /* ACCL/ACCH used by FPU / DSP MPY */
@@ -94,6 +78,8 @@ struct pt_regs {
        unsigned long DSP_CTRL;
 #endif
 
+       unsigned long sp;       /* user/kernel sp depending on entry  */
+
        /*------- Below list auto saved by h/w -----------*/
        unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
 
@@ -134,13 +120,13 @@ struct callee_regs {
 /* return 1 if PC in delay slot */
 #define delay_mode(regs) ((regs->status32 & STATUS_DE_MASK) == STATUS_DE_MASK)
 
-#define in_syscall(regs)    ((regs->ecr_vec == ECR_V_TRAP) && !regs->ecr_param)
-#define in_brkpt_trap(regs) ((regs->ecr_vec == ECR_V_TRAP) && regs->ecr_param)
+#define in_syscall(regs)    ((regs->ecr.vec == ECR_V_TRAP) && !regs->ecr.param)
+#define in_brkpt_trap(regs) ((regs->ecr.vec == ECR_V_TRAP) && regs->ecr.param)
 
 #define STATE_SCALL_RESTARTED  0x01
 
-#define syscall_wont_restart(reg) (reg->state |= STATE_SCALL_RESTARTED)
-#define syscall_restartable(reg) !(reg->state &  STATE_SCALL_RESTARTED)
+#define syscall_wont_restart(regs) (regs->ecr.state |= STATE_SCALL_RESTARTED)
+#define syscall_restartable(regs) !(regs->ecr.state &  STATE_SCALL_RESTARTED)
 
 #define current_pt_regs()                                      \
 ({                                                             \
@@ -181,6 +167,9 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
        return *(unsigned long *)((unsigned long)regs + offset);
 }
 
+extern int syscall_trace_entry(struct pt_regs *);
+extern void syscall_trace_exit(struct pt_regs *);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PTRACE_H */
index 028a8cf..1c6db59 100644 (file)
@@ -35,11 +35,11 @@ long __init arc_get_mem_sz(void);
 #define IS_AVAIL3(v, v2, s)    IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
 extern void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_mmu_bcr(void);
+extern int arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
 
 extern void arc_cache_init(void);
-extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_cache_bcr(void);
+extern int arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+
+extern void __init handle_uboot_args(void);
 
 #endif /* __ASMARC_SETUP_H */
index d856491..e0913f5 100644 (file)
@@ -29,6 +29,8 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 extern void __init smp_init_cpus(void);
 extern void first_lines_of_secondary(void);
 extern const char *arc_platform_smp_cpuinfo(void);
+extern void arc_platform_smp_wait_to_boot(int);
+extern void start_kernel_secondary(void);
 
 /*
  * API expected BY platform smp code (FROM arch smp code)
index 6ba7fe4..4c530cf 100644 (file)
  */
 struct thread_info {
        unsigned long flags;            /* low level flags */
+       unsigned long ksp;              /* kernel mode stack top in __switch_to */
        int preempt_count;              /* 0 => preemptable, <0 => BUG */
-       struct task_struct *task;       /* main task structure */
-       __u32 cpu;                      /* current CPU */
+       int cpu;                        /* current CPU */
        unsigned long thr_ptr;          /* TLS ptr */
+       struct task_struct *task;       /* main task structure */
 };
 
 /*
- * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
+ * initilaize thread_info for any @tsk
+ *  - this is not related to init_task per se
  */
 #define INIT_THREAD_INFO(tsk)                  \
 {                                              \
index 9971247..1e8809e 100644 (file)
@@ -146,8 +146,9 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n)
        if (n == 0)
                return 0;
 
-       /* unaligned */
-       if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+       /* fallback for unaligned access when hardware doesn't support */
+       if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+            (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
 
                unsigned char tmp;
 
@@ -373,8 +374,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
        if (n == 0)
                return 0;
 
-       /* unaligned */
-       if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+       /* fallback for unaligned access when hardware doesn't support */
+       if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+            (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
 
                unsigned char tmp;
 
@@ -584,7 +586,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
        return res;
 }
 
-static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
+static inline unsigned long __clear_user(void __user *to, unsigned long n)
 {
        long res = n;
        unsigned char *d_char = to;
@@ -626,17 +628,10 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
        return res;
 }
 
-#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
-#define __clear_user(d, n)             __arc_clear_user(d, n)
-#else
-extern unsigned long arc_clear_user_noinline(void __user *to,
-               unsigned long n);
-#define __clear_user(d, n)             arc_clear_user_noinline(d, n)
-#endif
+#define __clear_user                   __clear_user
 
 #include <asm-generic/uaccess.h>
 
index 0723d88..95fbf93 100644 (file)
@@ -5,6 +5,8 @@
 
 obj-y  := head.o arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
 obj-y  += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
+obj-y  += ctx_sw_asm.o
+
 obj-$(CONFIG_ISA_ARCOMPACT)            += entry-compact.o intc-compact.o
 obj-$(CONFIG_ISA_ARCV2)                        += entry-arcv2.o intc-arcv2.o
 
@@ -24,11 +26,4 @@ ifdef CONFIG_ISA_ARCOMPACT
 CFLAGS_fpu.o   += -mdpfp
 endif
 
-ifdef CONFIG_ARC_DW2_UNWIND
-CFLAGS_ctx_sw.o += -fno-omit-frame-pointer
-obj-y += ctx_sw.o
-else
-obj-y += ctx_sw_asm.o
-endif
-
 extra-y := vmlinux.lds
index 0e88403..f77deb7 100644 (file)
@@ -20,13 +20,13 @@ int main(void)
 
        BLANK();
 
-       DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
        DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg));
        DEFINE(THREAD_FAULT_ADDR,
               offsetof(struct thread_struct, fault_address));
 
        BLANK();
 
+       DEFINE(THREAD_INFO_KSP, offsetof(struct thread_info, ksp));
        DEFINE(THREAD_INFO_FLAGS, offsetof(struct thread_info, flags));
        DEFINE(THREAD_INFO_PREEMPT_COUNT,
               offsetof(struct thread_info, preempt_count));
@@ -46,7 +46,8 @@ int main(void)
        BLANK();
 
        DEFINE(PT_status32, offsetof(struct pt_regs, status32));
-       DEFINE(PT_event, offsetof(struct pt_regs, event));
+       DEFINE(PT_event, offsetof(struct pt_regs, ecr));
+       DEFINE(PT_bta, offsetof(struct pt_regs, bta));
        DEFINE(PT_sp, offsetof(struct pt_regs, sp));
        DEFINE(PT_r0, offsetof(struct pt_regs, r0));
        DEFINE(PT_r1, offsetof(struct pt_regs, r1));
@@ -61,13 +62,9 @@ int main(void)
        DEFINE(PT_r26, offsetof(struct pt_regs, r26));
        DEFINE(PT_ret, offsetof(struct pt_regs, ret));
        DEFINE(PT_blink, offsetof(struct pt_regs, blink));
+       OFFSET(PT_fp, pt_regs, fp);
        DEFINE(PT_lpe, offsetof(struct pt_regs, lp_end));
        DEFINE(PT_lpc, offsetof(struct pt_regs, lp_count));
-       DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
-
-       DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
-       DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
-
 #ifdef CONFIG_ISA_ARCV2
        OFFSET(PT_r12, pt_regs, r12);
        OFFSET(PT_r30, pt_regs, r30);
@@ -80,5 +77,8 @@ int main(void)
        OFFSET(PT_DSP_CTRL, pt_regs, DSP_CTRL);
 #endif
 
+       DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
+       DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
+
        return 0;
 }
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
deleted file mode 100644 (file)
index 1a76f2d..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * Vineetg: Aug 2009
- *  -"C" version of lowest level context switch asm macro called by schedular
- *   gcc doesn't generate the dward CFI info for hand written asm, hence can't
- *   backtrace out of it (e.g. tasks sleeping in kernel).
- *   So we cheat a bit by writing almost similar code in inline-asm.
- *  -This is a hacky way of doing things, but there is no other simple way.
- *   I don't want/intend to extend unwinding code to understand raw asm
- */
-
-#include <asm/asm-offsets.h>
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-
-#define KSP_WORD_OFF   ((TASK_THREAD + THREAD_KSP) / 4)
-
-struct task_struct *__sched
-__switch_to(struct task_struct *prev_task, struct task_struct *next_task)
-{
-       unsigned int tmp;
-       unsigned int prev = (unsigned int)prev_task;
-       unsigned int next = (unsigned int)next_task;
-
-       __asm__ __volatile__(
-               /* FP/BLINK save generated by gcc (standard function prologue */
-               "st.a    r13, [sp, -4]   \n\t"
-               "st.a    r14, [sp, -4]   \n\t"
-               "st.a    r15, [sp, -4]   \n\t"
-               "st.a    r16, [sp, -4]   \n\t"
-               "st.a    r17, [sp, -4]   \n\t"
-               "st.a    r18, [sp, -4]   \n\t"
-               "st.a    r19, [sp, -4]   \n\t"
-               "st.a    r20, [sp, -4]   \n\t"
-               "st.a    r21, [sp, -4]   \n\t"
-               "st.a    r22, [sp, -4]   \n\t"
-               "st.a    r23, [sp, -4]   \n\t"
-               "st.a    r24, [sp, -4]   \n\t"
-#ifndef CONFIG_ARC_CURR_IN_REG
-               "st.a    r25, [sp, -4]   \n\t"
-#else
-               "sub     sp, sp, 4      \n\t"   /* usual r25 placeholder */
-#endif
-
-               /* set ksp of outgoing task in tsk->thread.ksp */
-#if KSP_WORD_OFF <= 255
-               "st.as   sp, [%3, %1]    \n\t"
-#else
-               /*
-                * Workaround for NR_CPUS=4k
-                * %1 is bigger than 255 (S9 offset for st.as)
-                */
-               "add2    r24, %3, %1     \n\t"
-               "st      sp, [r24]       \n\t"
-#endif
-
-               /*
-                * setup _current_task with incoming tsk.
-                * optionally, set r25 to that as well
-                * For SMP extra work to get to &_current_task[cpu]
-                * (open coded SET_CURR_TASK_ON_CPU)
-                */
-#ifndef CONFIG_SMP
-               "st  %2, [@_current_task]       \n\t"
-#else
-               "lr   r24, [identity]           \n\t"
-               "lsr  r24, r24, 8               \n\t"
-               "bmsk r24, r24, 7               \n\t"
-               "add2 r24, @_current_task, r24  \n\t"
-               "st   %2,  [r24]                \n\t"
-#endif
-#ifdef CONFIG_ARC_CURR_IN_REG
-               "mov r25, %2   \n\t"
-#endif
-
-               /* get ksp of incoming task from tsk->thread.ksp */
-               "ld.as  sp, [%2, %1]   \n\t"
-
-               /* start loading it's CALLEE reg file */
-
-#ifndef CONFIG_ARC_CURR_IN_REG
-               "ld.ab   r25, [sp, 4]   \n\t"
-#else
-               "add    sp, sp, 4       \n\t"
-#endif
-               "ld.ab   r24, [sp, 4]   \n\t"
-               "ld.ab   r23, [sp, 4]   \n\t"
-               "ld.ab   r22, [sp, 4]   \n\t"
-               "ld.ab   r21, [sp, 4]   \n\t"
-               "ld.ab   r20, [sp, 4]   \n\t"
-               "ld.ab   r19, [sp, 4]   \n\t"
-               "ld.ab   r18, [sp, 4]   \n\t"
-               "ld.ab   r17, [sp, 4]   \n\t"
-               "ld.ab   r16, [sp, 4]   \n\t"
-               "ld.ab   r15, [sp, 4]   \n\t"
-               "ld.ab   r14, [sp, 4]   \n\t"
-               "ld.ab   r13, [sp, 4]   \n\t"
-
-               /* last (ret value) = prev : although for ARC it mov r0, r0 */
-               "mov     %0, %3        \n\t"
-
-               /* FP/BLINK restore generated by gcc (standard func epilogue */
-
-               : "=r"(tmp)
-               : "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
-               : "blink"
-       );
-
-       return (struct task_struct *)tmp;
-}
index 02c4614..48e1f21 100644 (file)
 #include <asm/entry.h>       /* For the SAVE_* macros */
 #include <asm/asm-offsets.h>
 
-#define KSP_WORD_OFF   ((TASK_THREAD + THREAD_KSP) / 4)
-
-;################### Low Level Context Switch ##########################
+; IN
+;  - r0: prev task (also current)
+;  - r1: next task
+; OUT
+;  - r0: prev task (so r0 not touched)
 
        .section .sched.text,"ax",@progbits
-       .align 4
-       .global __switch_to
-       .type   __switch_to, @function
-__switch_to:
-       CFI_STARTPROC
-
-       /* Save regs on kernel mode stack of task */
-       st.a    blink, [sp, -4]
-       st.a    fp, [sp, -4]
-       SAVE_CALLEE_SAVED_KERNEL
+ENTRY_CFI(__switch_to)
 
-       /* Save the now KSP in task->thread.ksp */
-#if KSP_WORD_OFF  <= 255
-       st.as  sp, [r0, KSP_WORD_OFF]
-#else
-       /* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
-       add2    r24, r0, KSP_WORD_OFF
-       st      sp, [r24]
-#endif
-       /*
-       * Return last task in r0 (return reg)
-       * On ARC, Return reg = First Arg reg = r0.
-       * Since we already have last task in r0,
-       * don't need to do anything special to return it
-       */
+       /* save kernel stack frame regs of @prev task */
+       push    blink
+       CFI_DEF_CFA_OFFSET 4
+       CFI_OFFSET r31, -4
+
+       push    fp
+       CFI_DEF_CFA_OFFSET 8
+       CFI_OFFSET r27, -8
+
+       mov     fp, sp
+       CFI_DEF_CFA_REGISTER r27
+
+       /* kernel mode callee regs of @prev */
+       SAVE_CALLEE_SAVED_KERNEL
 
        /*
-        * switch to new task, contained in r1
-        * Temp reg r3 is required to get the ptr to store val
+        * save final SP to @prev->thread_info.ksp
+        * @prev is "current" so thread_info derived from SP
         */
-       SET_CURR_TASK_ON_CPU  r1, r3
+       GET_CURR_THR_INFO_FROM_SP  r10
+       st      sp,  [r10, THREAD_INFO_KSP]
+
+       /* update @next in _current_task[] and GP register caching it */
+       SET_CURR_TASK_ON_CPU  r1, r10
 
-       /* reload SP with kernel mode stack pointer in task->thread.ksp */
-       ld.as  sp, [r1, (TASK_THREAD + THREAD_KSP)/4]
+       /* load SP from @next->thread_info.ksp */
+       ld      r10, [r1, TASK_THREAD_INFO]
+       ld      sp,  [r10, THREAD_INFO_KSP]
 
-       /* restore the registers */
+       /* restore callee regs, stack frame regs of @next */
        RESTORE_CALLEE_SAVED_KERNEL
-       ld.ab   fp, [sp, 4]
-       ld.ab   blink, [sp, 4]
-       j       [blink]
 
+       pop     fp
+       CFI_RESTORE r27
+       CFI_DEF_CFA r28, 4
+
+       pop     blink
+       CFI_RESTORE r31
+       CFI_DEF_CFA_OFFSET 0
+
+       j      [blink]
 END_CFI(__switch_to)
index 721d465..4c9e614 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <asm/mach_desc.h>
+#include <asm/serial.h>
 
 #ifdef CONFIG_SERIAL_EARLYCON
 
index a7e6a21..2e49c81 100644 (file)
@@ -125,11 +125,6 @@ ENTRY(mem_service)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        bl  do_memory_error
        b   ret_from_exception
 END(mem_service)
@@ -138,11 +133,6 @@ ENTRY(EV_Misaligned)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]   ; Faulting Data address
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        SAVE_CALLEE_SAVED_USER
        mov r2, sp              ; callee_regs
 
@@ -163,11 +153,6 @@ ENTRY(EV_TLBProtV)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]   ; Faulting Data address
-       mov r1, sp      ; pt_regs
-
-       FAKE_RET_FROM_EXCPN
-
        mov blink, ret_from_exception
        b   do_page_fault
 
index 5cb0cd7..774c03c 100644 (file)
@@ -254,18 +254,7 @@ END(handle_interrupt_level1)
 
 ENTRY(EV_TLBProtV)
 
-       EXCEPTION_PROLOGUE
-
-       mov r2, r10     ; ECR set into r10 already
-       lr  r0, [efa]   ; Faulting Data address (not part of pt_regs saved above)
-
-       ; Exception auto-disables further Intr/exceptions.
-       ; Re-enable them by pretending to return from exception
-       ; (so rest of handler executes in pure K mode)
-
-       FAKE_RET_FROM_EXCPN
-
-       mov   r1, sp    ; Handle to pt_regs
+       EXCEPTION_PROLOGUE      ; ECR returned in r10
 
        ;------ (5) Type of Protection Violation? ----------
        ;
@@ -273,8 +262,7 @@ ENTRY(EV_TLBProtV)
        ;   -Access Violation   : 00_23_(00|01|02|03)_00
        ;                                x  r  w  r+w
        ;   -Unaligned Access   : 00_23_04_00
-       ;
-       bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+       bbit1 r10, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
 
        ;========= (6a) Access Violation Processing ========
        bl  do_page_fault
@@ -303,9 +291,6 @@ END(EV_TLBProtV)
 ENTRY(call_do_page_fault)
 
        EXCEPTION_PROLOGUE
-       lr  r0, [efa]   ; Faulting Data address
-       mov   r1, sp
-       FAKE_RET_FROM_EXCPN
 
        mov blink, ret_from_exception
        b  do_page_fault
index 54e91df..089f668 100644 (file)
@@ -80,11 +80,6 @@ ENTRY(instr_service)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        bl  do_insterror_or_kprobe
        b   ret_from_exception
 END(instr_service)
@@ -95,16 +90,15 @@ END(instr_service)
 
 ENTRY(EV_MachineCheck)
 
-       EXCEPTION_PROLOGUE
+       EXCEPTION_PROLOGUE_KEEP_AE      ; ECR returned in r10
 
-       lr  r2, [ecr]
        lr  r0, [efa]
        mov r1, sp
 
        ; MC excpetions disable MMU
        ARC_MMU_REENABLE r3
 
-       lsr     r3, r2, 8
+       lsr     r3, r10, 8
        bmsk    r3, r3, 7
        brne    r3, ECR_C_MCHK_DUP_TLB, 1f
 
@@ -129,11 +123,6 @@ ENTRY(EV_PrivilegeV)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        bl  do_privilege_fault
        b   ret_from_exception
 END(EV_PrivilegeV)
@@ -145,11 +134,6 @@ ENTRY(EV_Extension)
 
        EXCEPTION_PROLOGUE
 
-       lr  r0, [efa]
-       mov r1, sp
-
-       FAKE_RET_FROM_EXCPN
-
        bl  do_extension_fault
        b   ret_from_exception
 END(EV_Extension)
@@ -160,20 +144,19 @@ END(EV_Extension)
 ; syscall Tracing
 ; ---------------------------------------------
 tracesys:
-       ; save EFA in case tracer wants the PC of traced task
-       ; using ERET won't work since next-PC has already committed
+       ; safekeep EFA (r12) if syscall tracer wanted PC
+       ; for traps, ERET is pre-commit so points to next-PC
        GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r11
        st  r12, [r11, THREAD_FAULT_ADDR]       ; thread.fault_address
 
-       ; PRE Sys Call Ptrace hook
-       mov r0, sp                      ; pt_regs needed
-       bl  @syscall_trace_entry
+       ; PRE syscall trace hook
+       mov r0, sp                              ; pt_regs
+       bl  @syscall_trace_enter
 
        ; Tracing code now returns the syscall num (orig or modif)
        mov r8, r0
 
        ; Do the Sys Call as we normally would.
-       ; Validate the Sys Call number
        cmp     r8,  NR_syscalls - 1
        mov.hi  r0, -ENOSYS
        bhi     tracesys_exit
@@ -190,37 +173,36 @@ tracesys:
        ld  r6, [sp, PT_r6]
        ld  r7, [sp, PT_r7]
        ld.as   r9, [sys_call_table, r8]
-       jl      [r9]        ; Entry into Sys Call Handler
+       jl      [r9]
 
 tracesys_exit:
-       st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
+       st  r0, [sp, PT_r0]
 
-       ;POST Sys Call Ptrace Hook
+       ; POST syscall trace hook
        mov r0, sp              ; pt_regs needed
        bl  @syscall_trace_exit
-       b   ret_from_exception ; NOT ret_from_system_call at is saves r0 which
-       ; we'd done before calling post hook above
+
+       ; don't call ret_from_system_call as it saves r0, already done above
+       b   ret_from_exception
 
 ; ---------------------------------------------
 ; Breakpoint TRAP
 ; ---------------------------------------------
 trap_with_param:
        mov r0, r12     ; EFA in case ptracer/gdb wants stop_pc
-       mov r1, sp
+       mov r1, sp      ; pt_regs
 
-       ; Save callee regs in case gdb wants to have a look
-       ; SP will grow up by size of CALLEE Reg-File
-       ; NOTE: clobbers r12
+       ; save callee regs in case tracer/gdb wants to peek
        SAVE_CALLEE_SAVED_USER
 
-       ; save location of saved Callee Regs @ thread_struct->pc
+       ; safekeep ref to callee regs
        GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
        st  sp, [r10, THREAD_CALLEE_REG]
 
-       ; Call the trap handler
+       ; call the non syscall trap handler
        bl  do_non_swi_trap
 
-       ; unwind stack to discard Callee saved Regs
+       ; unwind stack to discard callee regs
        DISCARD_CALLEE_SAVED_USER
 
        b   ret_from_exception
@@ -232,37 +214,33 @@ trap_with_param:
 
 ENTRY(EV_Trap)
 
-       EXCEPTION_PROLOGUE
+       EXCEPTION_PROLOGUE_KEEP_AE
 
        lr  r12, [efa]
 
        FAKE_RET_FROM_EXCPN
 
-       ;============ TRAP 1   :breakpoints
-       ; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR)
+       ;============ TRAP N : breakpoints, kprobes etc
        bmsk.f 0, r10, 7
        bnz    trap_with_param
 
-       ;============ TRAP  (no param): syscall top level
+       ;============ TRAP 0 (no param): syscall
 
-       ; If syscall tracing ongoing, invoke pre-post-hooks
+       ; syscall tracing ongoing, invoke pre-post-hooks around syscall
        GET_CURR_THR_INFO_FLAGS   r10
        and.f 0, r10, _TIF_SYSCALL_WORK
        bnz   tracesys  ; this never comes back
 
        ;============ Normal syscall case
 
-       ; syscall num shd not exceed the total system calls avail
        cmp     r8,  NR_syscalls - 1
        mov.hi  r0, -ENOSYS
        bhi     .Lret_from_system_call
 
-       ; Offset into the syscall_table and call handler
        ld.as   r9,[sys_call_table, r8]
-       jl      [r9]        ; Entry into Sys Call Handler
+       jl      [r9]
 
 .Lret_from_system_call:
-
        st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
 
        ; fall through to ret_from_exception
@@ -318,7 +296,7 @@ resume_user_mode_begin:
        ;      tracer might call PEEKUSR(CALLEE reg)
        ;
        ; NOTE: SP will grow up by size of CALLEE Reg-File
-       SAVE_CALLEE_SAVED_USER          ; clobbers r12
+       SAVE_CALLEE_SAVED_USER
 
        ; save location of saved Callee Regs @ thread_struct->callee
        GET_CURR_TASK_FIELD_PTR   TASK_THREAD, r10
index 5cda19d..6788987 100644 (file)
@@ -108,7 +108,7 @@ static void arcv2_irq_unmask(struct irq_data *data)
        write_aux_reg(AUX_IRQ_ENABLE, 1);
 }
 
-void arcv2_irq_enable(struct irq_data *data)
+static void arcv2_irq_enable(struct irq_data *data)
 {
        /* set default priority */
        write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
index 345a000..4f2b595 100644 (file)
@@ -175,7 +175,7 @@ void kgdb_trap(struct pt_regs *regs)
         * with trap_s 4 (compiled) breakpoints, continuation needs to
         * start after the breakpoint.
         */
-       if (regs->ecr_param == 3)
+       if (regs->ecr.param == 3)
                instruction_pointer(regs) -= BREAK_INSTR_SIZE;
 
        kgdb_handle_exception(1, SIGTRAP, 0, regs);
index f9fdb55..55373ca 100644 (file)
@@ -165,8 +165,6 @@ static void mcip_probe_n_setup(void)
                IS_AVAIL1(mp.idu, "IDU "),
                IS_AVAIL1(mp.dbg, "DEBUG "),
                IS_AVAIL1(mp.gfrc, "GFRC"));
-
-       cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
 }
 
 struct plat_smp_ops plat_smp_ops = {
index 980b71d..186ceab 100644 (file)
@@ -141,7 +141,7 @@ asmlinkage void ret_from_fork(void);
  * |    unused      |
  * |                |
  * ------------------
- * |     r25        |   <==== top of Stack (thread.ksp)
+ * |     r25        |   <==== top of Stack (thread_info.ksp)
  * ~                ~
  * |    --to--      |   (CALLEE Regs of kernel mode)
  * |     r13        |
@@ -162,7 +162,6 @@ asmlinkage void ret_from_fork(void);
  * |      SP        |
  * |    orig_r0     |
  * |    event/ECR   |
- * |    user_r25    |
  * ------------------  <===== END of PAGE
  */
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
@@ -182,14 +181,14 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
        c_callee = ((struct callee_regs *)childksp) - 1;
 
        /*
-        * __switch_to() uses thread.ksp to start unwinding stack
+        * __switch_to() uses thread_info.ksp to start unwinding stack
         * For kernel threads we don't need to create callee regs, the
         * stack layout nevertheless needs to remain the same.
         * Also, since __switch_to anyways unwinds callee regs, we use
         * this to populate kernel thread entry-pt/args into callee regs,
         * so that ret_from_kernel_thread() becomes simpler.
         */
-       p->thread.ksp = (unsigned long)c_callee;        /* THREAD_KSP */
+       task_thread_info(p)->ksp = (unsigned long)c_callee;     /* THREAD_INFO_KSP */
 
        /* __switch_to expects FP(0), BLINK(return addr) at top */
        childksp[0] = 0;                        /* fp */
@@ -243,16 +242,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
         */
        c_callee->r25 = task_thread_info(p)->thr_ptr;
 
-#ifdef CONFIG_ARC_CURR_IN_REG
-       /*
-        * setup usermode thread pointer #2:
-        * however for this special use of r25 in kernel, __switch_to() sets
-        * r25 for kernel needs and only in the final return path is usermode
-        * r25 setup, from pt_regs->user_r25. So set that up as well
-        */
-       c_regs->user_r25 = c_callee->r25;
-#endif
-
        return 0;
 }
 
index 2abdcd9..e0c233c 100644 (file)
@@ -46,8 +46,7 @@ static const struct pt_regs_offset regoffset_table[] = {
        REG_OFFSET_NAME(r0),
        REG_OFFSET_NAME(sp),
        REG_OFFSET_NAME(orig_r0),
-       REG_OFFSET_NAME(event),
-       REG_OFFSET_NAME(user_r25),
+       REG_OFFSET_NAME(ecr),
        REG_OFFSET_END,
 };
 
@@ -55,9 +54,8 @@ static const struct pt_regs_offset regoffset_table[] = {
 
 static const struct pt_regs_offset regoffset_table[] = {
        REG_OFFSET_NAME(orig_r0),
-       REG_OFFSET_NAME(event),
+       REG_OFFSET_NAME(ecr),
        REG_OFFSET_NAME(bta),
-       REG_OFFSET_NAME(user_r25),
        REG_OFFSET_NAME(r26),
        REG_OFFSET_NAME(fp),
        REG_OFFSET_NAME(sp),
@@ -341,7 +339,7 @@ long arch_ptrace(struct task_struct *child, long request,
        return ret;
 }
 
-asmlinkage int syscall_trace_entry(struct pt_regs *regs)
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
        if (test_thread_flag(TIF_SYSCALL_TRACE))
                if (ptrace_report_syscall_entry(regs))
index 41f07b3..4dcf858 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/mach_desc.h>
 #include <asm/smp.h>
 #include <asm/dsp-impl.h>
+#include <soc/arc/mcip.h>
 
 #define FIX_PTR(x)  __asm__ __volatile__(";" : "+r"(x))
 
@@ -43,19 +44,22 @@ const struct machine_desc *machine_desc;
 
 struct task_struct *_current_task[NR_CPUS];    /* For stack switching */
 
-struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
+struct cpuinfo_arc {
+       int arcver;
+       unsigned int t0:1, t1:1;
+       struct {
+               unsigned long base;
+               unsigned int sz;
+       } iccm, dccm;
+};
+
+#ifdef CONFIG_ISA_ARCV2
 
-static const struct id_to_str arc_legacy_rel[] = {
+static const struct id_to_str arc_hs_rel[] = {
        /* ID.ARCVER,   Release */
-#ifdef CONFIG_ISA_ARCOMPACT
-       { 0x34,         "R4.10"},
-       { 0x35,         "R4.11"},
-#else
        { 0x51,         "R2.0" },
        { 0x52,         "R2.1" },
        { 0x53,         "R3.0" },
-#endif
-       { 0x00,         NULL   }
 };
 
 static const struct id_to_str arc_hs_ver54_rel[] = {
@@ -66,323 +70,296 @@ static const struct id_to_str arc_hs_ver54_rel[] = {
        {  3,           "R4.00a"},
        {  0xFF,        NULL   }
 };
+#endif
 
-static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+static int
+arcompact_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
 {
-       if (is_isa_arcompact()) {
-               struct bcr_iccm_arcompact iccm;
-               struct bcr_dccm_arcompact dccm;
+       int n = 0;
+#ifdef CONFIG_ISA_ARCOMPACT
+       char *cpu_nm, *isa_nm = "ARCompact";
+       struct bcr_fp_arcompact fpu_sp, fpu_dp;
+       int atomic = 0, be, present;
+       int bpu_full, bpu_cache, bpu_pred;
+       struct bcr_bpu_arcompact bpu;
+       struct bcr_iccm_arcompact iccm;
+       struct bcr_dccm_arcompact dccm;
+       struct bcr_generic isa;
 
-               READ_BCR(ARC_REG_ICCM_BUILD, iccm);
-               if (iccm.ver) {
-                       cpu->iccm.sz = 4096 << iccm.sz; /* 8K to 512K */
-                       cpu->iccm.base_addr = iccm.base << 16;
-               }
+       READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
 
-               READ_BCR(ARC_REG_DCCM_BUILD, dccm);
-               if (dccm.ver) {
-                       unsigned long base;
-                       cpu->dccm.sz = 2048 << dccm.sz; /* 2K to 256K */
+       if (!isa.ver)   /* ISA BCR absent, use Kconfig info */
+               atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+       else {
+               /* ARC700_BUILD only has 2 bits of isa info */
+               atomic = isa.info & 1;
+       }
 
-                       base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
-                       cpu->dccm.base_addr = base & ~0xF;
-               }
-       } else {
-               struct bcr_iccm_arcv2 iccm;
-               struct bcr_dccm_arcv2 dccm;
-               unsigned long region;
-
-               READ_BCR(ARC_REG_ICCM_BUILD, iccm);
-               if (iccm.ver) {
-                       cpu->iccm.sz = 256 << iccm.sz00;        /* 512B to 16M */
-                       if (iccm.sz00 == 0xF && iccm.sz01 > 0)
-                               cpu->iccm.sz <<= iccm.sz01;
-
-                       region = read_aux_reg(ARC_REG_AUX_ICCM);
-                       cpu->iccm.base_addr = region & 0xF0000000;
-               }
+       be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
 
-               READ_BCR(ARC_REG_DCCM_BUILD, dccm);
-               if (dccm.ver) {
-                       cpu->dccm.sz = 256 << dccm.sz0;
-                       if (dccm.sz0 == 0xF && dccm.sz1 > 0)
-                               cpu->dccm.sz <<= dccm.sz1;
+       if (info->arcver < 0x34)
+               cpu_nm = "ARC750";
+       else
+               cpu_nm = "ARC770";
 
-                       region = read_aux_reg(ARC_REG_AUX_DCCM);
-                       cpu->dccm.base_addr = region & 0xF0000000;
-               }
-       }
-}
+       n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s%s%s\n",
+                      c, cpu_nm, isa_nm,
+                      IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+                      IS_AVAIL1(be, "[Big-Endian]"));
 
-static void decode_arc_core(struct cpuinfo_arc *cpu)
-{
-       struct bcr_uarch_build_arcv2 uarch;
-       const struct id_to_str *tbl;
-
-       if (cpu->core.family < 0x54) { /* includes arc700 */
+       READ_BCR(ARC_REG_FP_BCR, fpu_sp);
+       READ_BCR(ARC_REG_DPFP_BCR, fpu_dp);
 
-               for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) {
-                       if (cpu->core.family == tbl->id) {
-                               cpu->release = tbl->str;
-                               break;
-                       }
-               }
+       if (fpu_sp.ver | fpu_dp.ver)
+               n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
+                              IS_AVAIL1(fpu_sp.ver, "SP "),
+                              IS_AVAIL1(fpu_dp.ver, "DP "));
 
-               if (is_isa_arcompact())
-                       cpu->name = "ARC700";
-               else if (tbl->str)
-                       cpu->name = "HS38";
-               else
-                       cpu->name = cpu->release = "Unknown";
+       READ_BCR(ARC_REG_BPU_BCR, bpu);
+       bpu_full = bpu.fam ? 1 : 0;
+       bpu_cache = 256 << (bpu.ent - 1);
+       bpu_pred = 256 << (bpu.ent - 1);
 
-               return;
+       n += scnprintf(buf + n, len - n,
+                       "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n",
+                       IS_AVAIL1(bpu_full, "full"),
+                       IS_AVAIL1(!bpu_full, "partial"),
+                       bpu_cache, bpu_pred);
+
+       READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+       if (iccm.ver) {
+               info->iccm.sz = 4096 << iccm.sz;        /* 8K to 512K */
+               info->iccm.base = iccm.base << 16;
        }
 
-       /*
-        * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
-        * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
-        * releases only update it.
-        */
-       READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
-
-       if (uarch.prod == 4) {
-               cpu->name = "HS48";
-               cpu->extn.dual = 1;
+       READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+       if (dccm.ver) {
+               unsigned long base;
+               info->dccm.sz = 2048 << dccm.sz;        /* 2K to 256K */
 
-       } else {
-               cpu->name = "HS38";
+               base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+               info->dccm.base = base & ~0xF;
        }
 
-       for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
-               if (uarch.maj == tbl->id) {
-                       cpu->release = tbl->str;
-                       break;
-               }
-       }
+       /* ARCompact ISA specific sanity checks */
+       present = fpu_dp.ver;   /* SP has no arch visible regs */
+       CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
+#endif
+       return n;
+
 }
 
-static void read_arc_build_cfg_regs(void)
+static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
 {
-       struct bcr_timer timer;
-       struct bcr_generic bcr;
-       struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+       int n = 0;
+#ifdef CONFIG_ISA_ARCV2
+       const char *release, *cpu_nm, *isa_nm = "ARCv2";
+       int dual_issue = 0, dual_enb = 0, mpy_opt, present;
+       int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk;
+       char mpy_nm[16], lpb_nm[32];
        struct bcr_isa_arcv2 isa;
-       struct bcr_actionpoint ap;
-
-       FIX_PTR(cpu);
+       struct bcr_mpy mpy;
+       struct bcr_fp_arcv2 fpu;
+       struct bcr_bpu_arcv2 bpu;
+       struct bcr_lpb lpb;
+       struct bcr_iccm_arcv2 iccm;
+       struct bcr_dccm_arcv2 dccm;
+       struct bcr_erp erp;
 
-       READ_BCR(AUX_IDENTITY, cpu->core);
-       decode_arc_core(cpu);
-
-       READ_BCR(ARC_REG_TIMERS_BCR, timer);
-       cpu->extn.timer0 = timer.t0;
-       cpu->extn.timer1 = timer.t1;
-       cpu->extn.rtc = timer.rtc;
-
-       cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
+       /*
+        * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
+        * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
+        * releases only update it.
+        */
 
-       READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
+       cpu_nm = "HS38";
 
-       /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
-       read_decode_ccm_bcr(cpu);
+       if (info->arcver > 0x50 && info->arcver <= 0x53) {
+               release = arc_hs_rel[info->arcver - 0x51].str;
+       } else {
+               const struct id_to_str *tbl;
+               struct bcr_uarch_build uarch;
 
-       read_decode_mmu_bcr();
-       read_decode_cache_bcr();
+               READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
 
-       if (is_isa_arcompact()) {
-               struct bcr_fp_arcompact sp, dp;
-               struct bcr_bpu_arcompact bpu;
-
-               READ_BCR(ARC_REG_FP_BCR, sp);
-               READ_BCR(ARC_REG_DPFP_BCR, dp);
-               cpu->extn.fpu_sp = sp.ver ? 1 : 0;
-               cpu->extn.fpu_dp = dp.ver ? 1 : 0;
-
-               READ_BCR(ARC_REG_BPU_BCR, bpu);
-               cpu->bpu.ver = bpu.ver;
-               cpu->bpu.full = bpu.fam ? 1 : 0;
-               if (bpu.ent) {
-                       cpu->bpu.num_cache = 256 << (bpu.ent - 1);
-                       cpu->bpu.num_pred = 256 << (bpu.ent - 1);
+               for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
+                       if (uarch.maj == tbl->id) {
+                               release = tbl->str;
+                               break;
+                       }
                }
-       } else {
-               struct bcr_fp_arcv2 spdp;
-               struct bcr_bpu_arcv2 bpu;
-
-               READ_BCR(ARC_REG_FP_V2_BCR, spdp);
-               cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
-               cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
-
-               READ_BCR(ARC_REG_BPU_BCR, bpu);
-               cpu->bpu.ver = bpu.ver;
-               cpu->bpu.full = bpu.ft;
-               cpu->bpu.num_cache = 256 << bpu.bce;
-               cpu->bpu.num_pred = 2048 << bpu.pte;
-               cpu->bpu.ret_stk = 4 << bpu.rse;
-
-               /* if dual issue hardware, is it enabled ? */
-               if (cpu->extn.dual) {
+               if (uarch.prod == 4) {
                        unsigned int exec_ctrl;
 
+                       cpu_nm = "HS48";
+                       dual_issue = 1;
+                       /* if dual issue hardware, is it enabled ? */
                        READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
-                       cpu->extn.dual_enb = !(exec_ctrl & 1);
+                       dual_enb = !(exec_ctrl & 1);
                }
        }
 
-       READ_BCR(ARC_REG_AP_BCR, ap);
-       if (ap.ver) {
-               cpu->extn.ap_num = 2 << ap.num;
-               cpu->extn.ap_full = !ap.min;
-       }
-
-       READ_BCR(ARC_REG_SMART_BCR, bcr);
-       cpu->extn.smart = bcr.ver ? 1 : 0;
-
-       READ_BCR(ARC_REG_RTT_BCR, bcr);
-       cpu->extn.rtt = bcr.ver ? 1 : 0;
-
        READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
 
-       /* some hacks for lack of feature BCR info in old ARC700 cores */
-       if (is_isa_arcompact()) {
-               if (!isa.ver)   /* ISA BCR absent, use Kconfig info */
-                       cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
-               else {
-                       /* ARC700_BUILD only has 2 bits of isa info */
-                       struct bcr_generic bcr = *(struct bcr_generic *)&isa;
-                       cpu->isa.atomic = bcr.info & 1;
-               }
-
-               cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+       n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
+                      c, cpu_nm, release, isa_nm,
+                      IS_AVAIL1(isa.be, "[Big-Endian]"),
+                      IS_AVAIL3(dual_issue, dual_enb, " Dual-Issue "));
+
+       READ_BCR(ARC_REG_MPY_BCR, mpy);
+       mpy_opt = 2;    /* stock MPY/MPYH */
+       if (mpy.dsp)    /* OPT 7-9 */
+               mpy_opt = mpy.dsp + 6;
+
+       scnprintf(mpy_nm, 16, "mpy[opt %d] ", mpy_opt);
+
+       READ_BCR(ARC_REG_FP_V2_BCR, fpu);
+
+       n += scnprintf(buf + n, len - n, "ISA Extn\t: %s%s%s%s%s%s%s%s%s%s%s\n",
+                      IS_AVAIL2(isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+                      IS_AVAIL2(isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+                      IS_AVAIL2(isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
+                      IS_AVAIL1(mpy.ver, mpy_nm),
+                      IS_AVAIL1(isa.div_rem, "div_rem "),
+                      IS_AVAIL1((fpu.sp | fpu.dp), "  FPU:"),
+                      IS_AVAIL1(fpu.sp, " sp"),
+                      IS_AVAIL1(fpu.dp, " dp"));
+
+       READ_BCR(ARC_REG_BPU_BCR, bpu);
+       bpu_full = bpu.ft;
+       bpu_cache = 256 << bpu.bce;
+       bpu_pred = 2048 << bpu.pte;
+       bpu_ret_stk = 4 << bpu.rse;
+
+       READ_BCR(ARC_REG_LPB_BUILD, lpb);
+       if (lpb.ver) {
+               unsigned int ctl;
+               ctl = read_aux_reg(ARC_REG_LPB_CTRL);
+
+               scnprintf(lpb_nm, sizeof(lpb_nm), " Loop Buffer:%d %s",
+                         lpb.entries, IS_DISABLED_RUN(!ctl));
+       }
 
-                /* there's no direct way to distinguish 750 vs. 770 */
-               if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3))
-                       cpu->name = "ARC750";
-       } else {
-               cpu->isa = isa;
+       n += scnprintf(buf + n, len - n,
+                       "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d%s\n",
+                       IS_AVAIL1(bpu_full, "full"),
+                       IS_AVAIL1(!bpu_full, "partial"),
+                       bpu_cache, bpu_pred, bpu_ret_stk,
+                       lpb_nm);
+
+       READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+       if (iccm.ver) {
+               unsigned long base;
+               info->iccm.sz = 256 << iccm.sz00;       /* 512B to 16M */
+               if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+                       info->iccm.sz <<= iccm.sz01;
+               base = read_aux_reg(ARC_REG_AUX_ICCM);
+               info->iccm.base = base & 0xF0000000;
        }
-}
 
-static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
-{
-       struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
-       struct bcr_identity *core = &cpu->core;
-       char mpy_opt[16];
-       int n = 0;
+       READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+       if (dccm.ver) {
+               unsigned long base;
+               info->dccm.sz = 256 << dccm.sz0;
+               if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+                       info->dccm.sz <<= dccm.sz1;
+               base = read_aux_reg(ARC_REG_AUX_DCCM);
+               info->dccm.base = base & 0xF0000000;
+       }
 
-       FIX_PTR(cpu);
+       /* Error Protection: ECC/Parity */
+       READ_BCR(ARC_REG_ERP_BUILD, erp);
+       if (erp.ver) {
+               struct ctl_erp ctl;
+               READ_BCR(ARC_REG_ERP_CTRL, ctl);
+               /* inverted bits: 0 means enabled */
+               n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
+                               IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
+                               IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
+                               IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
+       }
 
-       n += scnprintf(buf + n, len - n,
-                      "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
-                      core->family, core->cpu_id, core->chip_id);
+       /* ARCv2 ISA specific sanity checks */
+       present = fpu.sp | fpu.dp | mpy.dsp;    /* DSP and/or FPU */
+       CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
 
-       n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
-                      cpu_id, cpu->name, cpu->release,
-                      is_isa_arcompact() ? "ARCompact" : "ARCv2",
-                      IS_AVAIL1(cpu->isa.be, "[Big-Endian]"),
-                      IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue "));
+       dsp_config_check();
+#endif
+       return n;
+}
 
-       n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ",
-                      IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
-                      IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
-                      IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
-                      IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT));
+static char *arc_cpu_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
+{
+       struct bcr_identity ident;
+       struct bcr_timer timer;
+       struct bcr_generic bcr;
+       struct mcip_bcr mp;
+       struct bcr_actionpoint ap;
+       unsigned long vec_base;
+       int ap_num, ap_full, smart, rtt, n;
 
-       if (cpu->extn_mpy.ver) {
-               if (is_isa_arcompact()) {
-                       scnprintf(mpy_opt, 16, "mpy");
-               } else {
+       memset(info, 0, sizeof(struct cpuinfo_arc));
 
-                       int opt = 2;    /* stock MPY/MPYH */
+       READ_BCR(AUX_IDENTITY, ident);
+       info->arcver = ident.family;
 
-                       if (cpu->extn_mpy.dsp)  /* OPT 7-9 */
-                               opt = cpu->extn_mpy.dsp + 6;
+       n = scnprintf(buf, len,
+                      "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
+                      ident.family, ident.cpu_id, ident.chip_id);
 
-                       scnprintf(mpy_opt, 16, "mpy[opt %d] ", opt);
-               }
+       if (is_isa_arcompact()) {
+               n += arcompact_mumbojumbo(c, info, buf + n, len - n);
+       } else if (is_isa_arcv2()){
+               n += arcv2_mumbojumbo(c, info, buf + n, len - n);
        }
 
-       n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
-                      IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
-                      IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
-                      IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
-                      IS_AVAIL1(cpu->extn_mpy.ver, mpy_opt),
-                      IS_AVAIL1(cpu->isa.div_rem, "div_rem "));
+       n += arc_mmu_mumbojumbo(c, buf + n, len - n);
+       n += arc_cache_mumbojumbo(c, buf + n, len - n);
 
-       if (cpu->bpu.ver) {
-               n += scnprintf(buf + n, len - n,
-                             "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
-                             IS_AVAIL1(cpu->bpu.full, "full"),
-                             IS_AVAIL1(!cpu->bpu.full, "partial"),
-                             cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);
-
-               if (is_isa_arcv2()) {
-                       struct bcr_lpb lpb;
-
-                       READ_BCR(ARC_REG_LPB_BUILD, lpb);
-                       if (lpb.ver) {
-                               unsigned int ctl;
-                               ctl = read_aux_reg(ARC_REG_LPB_CTRL);
-
-                               n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s",
-                                              lpb.entries,
-                                              IS_DISABLED_RUN(!ctl));
-                       }
-               }
-               n += scnprintf(buf + n, len - n, "\n");
-       }
+       READ_BCR(ARC_REG_TIMERS_BCR, timer);
+       info->t0 = timer.t0;
+       info->t1 = timer.t1;
 
-       return buf;
-}
+       READ_BCR(ARC_REG_MCIP_BCR, mp);
+       vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
 
-static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
-{
-       int n = 0;
-       struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
+       n += scnprintf(buf + n, len - n,
+                      "Timers\t\t: %s%s%s%s%s%s\nVector Table\t: %#lx\n",
+                      IS_AVAIL1(timer.t0, "Timer0 "),
+                      IS_AVAIL1(timer.t1, "Timer1 "),
+                      IS_AVAIL2(timer.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+                      IS_AVAIL2(mp.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+                      vec_base);
 
-       FIX_PTR(cpu);
+       READ_BCR(ARC_REG_AP_BCR, ap);
+       if (ap.ver) {
+               ap_num = 2 << ap.num;
+               ap_full = !ap.min;
+       }
 
-       n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base);
+       READ_BCR(ARC_REG_SMART_BCR, bcr);
+       smart = bcr.ver ? 1 : 0;
 
-       if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
-               n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
-                              IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
-                              IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
+       READ_BCR(ARC_REG_RTT_BCR, bcr);
+       rtt = bcr.ver ? 1 : 0;
 
-       if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
+       if (ap.ver | smart | rtt) {
                n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
-                              IS_AVAIL1(cpu->extn.smart, "smaRT "),
-                              IS_AVAIL1(cpu->extn.rtt, "RTT "));
-               if (cpu->extn.ap_num) {
+                              IS_AVAIL1(smart, "smaRT "),
+                              IS_AVAIL1(rtt, "RTT "));
+               if (ap.ver) {
                        n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
-                                      cpu->extn.ap_num,
-                                      cpu->extn.ap_full ? "full":"min");
+                                      ap_num,
+                                      ap_full ? "full":"min");
                }
                n += scnprintf(buf + n, len - n, "\n");
        }
 
-       if (cpu->dccm.sz || cpu->iccm.sz)
-               n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
-                              cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
-                              cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
-
-       if (is_isa_arcv2()) {
-
-               /* Error Protection: ECC/Parity */
-               struct bcr_erp erp;
-               READ_BCR(ARC_REG_ERP_BUILD, erp);
-
-               if (erp.ver) {
-                       struct  ctl_erp ctl;
-                       READ_BCR(ARC_REG_ERP_CTRL, ctl);
-
-                       /* inverted bits: 0 means enabled */
-                       n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
-                               IS_AVAIL3(erp.ic,  !ctl.dpi, "IC "),
-                               IS_AVAIL3(erp.dc,  !ctl.dpd, "DC "),
-                               IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
-               }
-       }
+       if (info->dccm.sz || info->iccm.sz)
+               n += scnprintf(buf + n, len - n,
+                              "Extn [CCM]\t: DCCM @ %lx, %d KB / ICCM: @ %lx, %d KB\n",
+                              info->dccm.base, TO_KB(info->dccm.sz),
+                              info->iccm.base, TO_KB(info->iccm.sz));
 
        return buf;
 }
@@ -401,15 +378,15 @@ void chk_opt_weak(char *opt_name, bool hw_exists, bool opt_ena)
                panic("Disable %s, hardware NOT present\n", opt_name);
 }
 
-static void arc_chk_core_config(void)
+/*
+ * ISA agnostic sanity checks
+ */
+static void arc_chk_core_config(struct cpuinfo_arc *info)
 {
-       struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
-       int present = 0;
-
-       if (!cpu->extn.timer0)
+       if (!info->t0)
                panic("Timer0 is not present!\n");
 
-       if (!cpu->extn.timer1)
+       if (!info->t1)
                panic("Timer1 is not present!\n");
 
 #ifdef CONFIG_ARC_HAS_DCCM
@@ -417,35 +394,17 @@ static void arc_chk_core_config(void)
         * DCCM can be arbit placed in hardware.
         * Make sure it's placement/sz matches what Linux is built with
         */
-       if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr)
+       if ((unsigned int)__arc_dccm_base != info->dccm.base)
                panic("Linux built with incorrect DCCM Base address\n");
 
-       if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz)
+       if (CONFIG_ARC_DCCM_SZ * SZ_1K != info->dccm.sz)
                panic("Linux built with incorrect DCCM Size\n");
 #endif
 
 #ifdef CONFIG_ARC_HAS_ICCM
-       if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz)
+       if (CONFIG_ARC_ICCM_SZ * SZ_1K != info->iccm.sz)
                panic("Linux built with incorrect ICCM Size\n");
 #endif
-
-       /*
-        * FP hardware/software config sanity
-        * -If hardware present, kernel needs to save/restore FPU state
-        * -If not, it will crash trying to save/restore the non-existant regs
-        */
-
-       if (is_isa_arcompact()) {
-               /* only DPDP checked since SP has no arch visible regs */
-               present = cpu->extn.fpu_dp;
-               CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
-       } else {
-               /* Accumulator Low:High pair (r58:59) present if DSP MPY or FPU */
-               present = cpu->extn_mpy.dsp | cpu->extn.fpu_sp | cpu->extn.fpu_dp;
-               CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
-
-               dsp_config_check();
-       }
 }
 
 /*
@@ -456,21 +415,19 @@ static void arc_chk_core_config(void)
 
 void setup_processor(void)
 {
+       struct cpuinfo_arc info;
+       int c = smp_processor_id();
        char str[512];
-       int cpu_id = smp_processor_id();
 
-       read_arc_build_cfg_regs();
-       arc_init_IRQ();
+       pr_info("%s", arc_cpu_mumbojumbo(c, &info, str, sizeof(str)));
+       pr_info("%s", arc_platform_smp_cpuinfo());
 
-       pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
+       arc_chk_core_config(&info);
 
+       arc_init_IRQ();
        arc_mmu_init();
        arc_cache_init();
 
-       pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
-       pr_info("%s", arc_platform_smp_cpuinfo());
-
-       arc_chk_core_config();
 }
 
 static inline bool uboot_arg_invalid(unsigned long addr)
@@ -617,6 +574,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        char *str;
        int cpu_id = ptr_to_cpu(v);
        struct device *cpu_dev = get_cpu_device(cpu_id);
+       struct cpuinfo_arc info;
        struct clk *cpu_clk;
        unsigned long freq = 0;
 
@@ -629,7 +587,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        if (!str)
                goto done;
 
-       seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
+       seq_printf(m, arc_cpu_mumbojumbo(cpu_id, &info, str, PAGE_SIZE));
 
        cpu_clk = clk_get(cpu_dev, NULL);
        if (IS_ERR(cpu_clk)) {
@@ -646,9 +604,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                   loops_per_jiffy / (500000 / HZ),
                   (loops_per_jiffy / (5000 / HZ)) % 100);
 
-       seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE));
-       seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE));
-       seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE));
        seq_printf(m, arc_platform_smp_cpuinfo());
 
        free_page((unsigned long)str);
index 3c1590c..0b3bb52 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/sched/task_stack.h>
 
 #include <asm/ucontext.h>
+#include <asm/entry.h>
 
 struct rt_sigframe {
        struct siginfo info;
index 409cfa4..8d9b188 100644 (file)
 #include <linux/export.h>
 #include <linux/of_fdt.h>
 
-#include <asm/processor.h>
-#include <asm/setup.h>
 #include <asm/mach_desc.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
 
 #ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -351,7 +352,7 @@ static inline int __do_IPI(unsigned long msg)
  * arch-common ISR to handle for inter-processor interrupts
  * Has hooks for platform specific IPI
  */
-irqreturn_t do_IPI(int irq, void *dev_id)
+static irqreturn_t do_IPI(int irq, void *dev_id)
 {
        unsigned long pending;
        unsigned long __maybe_unused copy;
index 5372dc0..ea99c06 100644 (file)
@@ -29,6 +29,7 @@
 
 #include <asm/arcregs.h>
 #include <asm/unwind.h>
+#include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 
 /*-------------------------------------------------------------------------
index 6b83e3f..9b9570b 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/ptrace.h>
 #include <linux/kprobes.h>
 #include <linux/kgdb.h>
+#include <asm/entry.h>
 #include <asm/setup.h>
 #include <asm/unaligned.h>
 #include <asm/kprobes.h>
@@ -109,9 +110,7 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
  */
 void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
 {
-       unsigned int param = regs->ecr_param;
-
-       switch (param) {
+       switch (regs->ecr.param) {
        case 1:
                trap_is_brkpt(address, regs);
                break;
index 7654c2e..d5b3ed2 100644 (file)
@@ -115,8 +115,8 @@ static void show_ecr_verbose(struct pt_regs *regs)
        /* For Data fault, this is data address not instruction addr */
        address = current->thread.fault_address;
 
-       vec = regs->ecr_vec;
-       cause_code = regs->ecr_cause;
+       vec = regs->ecr.vec;
+       cause_code = regs->ecr.cause;
 
        /* For DTLB Miss or ProtV, display the memory involved too */
        if (vec == ECR_V_DTLB_MISS) {
@@ -154,7 +154,7 @@ static void show_ecr_verbose(struct pt_regs *regs)
                pr_cont("Misaligned r/w from 0x%08lx\n", address);
 #endif
        } else if (vec == ECR_V_TRAP) {
-               if (regs->ecr_param == 5)
+               if (regs->ecr.param == 5)
                        pr_cont("gcc generated __builtin_trap\n");
        } else {
                pr_cont("Check Programmer's Manual\n");
@@ -184,9 +184,10 @@ void show_regs(struct pt_regs *regs)
        if (user_mode(regs))
                show_faulting_vma(regs->ret); /* faulting code, not data */
 
-       pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\nSTAT: 0x%08lx",
-               regs->event, current->thread.fault_address, regs->ret,
-               regs->status32);
+       pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\n",
+               regs->ecr.full, current->thread.fault_address, regs->ret);
+
+       pr_info("STAT32: 0x%08lx", regs->status32);
 
 #define STS_BIT(r, bit)        r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
 
index d2e09fe..d0a5cec 100644 (file)
 #endif
 
 ENTRY_CFI(memset)
-       PREFETCHW_INSTR r0, 0   ; Prefetch the first write location
        mov.f   0, r2
 ;;; if size is zero
        jz.d    [blink]
        mov     r3, r0          ; don't clobber ret val
 
+       PREFETCHW_INSTR r0, 0   ; Prefetch the first write location
+
 ;;; if length < 8
        brls.d.nt       r2, 8, .Lsmallchunk
        mov.f   lp_count,r2
index 3c16ee9..f7e05c1 100644 (file)
@@ -28,6 +28,10 @@ int slc_enable = 1, ioc_enable = 1;
 unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
 unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
 
+static struct cpuinfo_arc_cache {
+       unsigned int sz_k, line_len, colors;
+} ic_info, dc_info, slc_info;
+
 void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
                               unsigned long sz, const int op, const int full_page);
 
@@ -35,78 +39,24 @@ void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz);
 void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz);
 void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz);
 
-char *arc_cache_mumbojumbo(int c, char *buf, int len)
-{
-       int n = 0;
-       struct cpuinfo_arc_cache *p;
-
-#define PR_CACHE(p, cfg, str)                                          \
-       if (!(p)->line_len)                                             \
-               n += scnprintf(buf + n, len - n, str"\t\t: N/A\n");     \
-       else                                                            \
-               n += scnprintf(buf + n, len - n,                        \
-                       str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",  \
-                       (p)->sz_k, (p)->assoc, (p)->line_len,           \
-                       (p)->vipt ? "VIPT" : "PIPT",                    \
-                       (p)->alias ? " aliasing" : "",                  \
-                       IS_USED_CFG(cfg));
-
-       PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
-       PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
-
-       p = &cpuinfo_arc700[c].slc;
-       if (p->line_len)
-               n += scnprintf(buf + n, len - n,
-                              "SLC\t\t: %uK, %uB Line%s\n",
-                              p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
-
-       n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
-                      perip_base,
-                      IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
-
-       return buf;
-}
-
-/*
- * Read the Cache Build Confuration Registers, Decode them and save into
- * the cpuinfo structure for later use.
- * No Validation done here, simply read/convert the BCRs
- */
-static void read_decode_cache_bcr_arcv2(int cpu)
+static int read_decode_cache_bcr_arcv2(int c, char *buf, int len)
 {
-       struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
+       struct cpuinfo_arc_cache *p_slc = &slc_info;
+       struct bcr_identity ident;
        struct bcr_generic sbcr;
-
-       struct bcr_slc_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int pad:24, way:2, lsz:2, sz:4;
-#else
-               unsigned int sz:4, lsz:2, way:2, pad:24;
-#endif
-       } slc_cfg;
-
-       struct bcr_clust_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
-#else
-               unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
-#endif
-       } cbcr;
-
-       struct bcr_volatile {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int start:4, limit:4, pad:22, order:1, disable:1;
-#else
-               unsigned int disable:1, order:1, pad:22, limit:4, start:4;
-#endif
-       } vol;
-
+       struct bcr_clust_cfg cbcr;
+       struct bcr_volatile vol;
+       int n = 0;
 
        READ_BCR(ARC_REG_SLC_BCR, sbcr);
        if (sbcr.ver) {
+               struct bcr_slc_cfg  slc_cfg;
                READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
                p_slc->sz_k = 128 << slc_cfg.sz;
                l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+               n += scnprintf(buf + n, len - n,
+                              "SLC\t\t: %uK, %uB Line%s\n",
+                              p_slc->sz_k, p_slc->line_len, IS_USED_RUN(slc_enable));
        }
 
        READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
@@ -129,70 +79,83 @@ static void read_decode_cache_bcr_arcv2(int cpu)
                ioc_enable = 0;
        }
 
+       READ_BCR(AUX_IDENTITY, ident);
+
        /* HS 2.0 didn't have AUX_VOL */
-       if (cpuinfo_arc700[cpu].core.family > 0x51) {
+       if (ident.family > 0x51) {
                READ_BCR(AUX_VOL, vol);
                perip_base = vol.start << 28;
                /* HS 3.0 has limit and strict-ordering fields */
-               if (cpuinfo_arc700[cpu].core.family > 0x52)
+               if (ident.family > 0x52)
                        perip_end = (vol.limit << 28) - 1;
        }
+
+       n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
+                      perip_base,
+                      IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
+
+       return n;
 }
 
-void read_decode_cache_bcr(void)
+int arc_cache_mumbojumbo(int c, char *buf, int len)
 {
-       struct cpuinfo_arc_cache *p_ic, *p_dc;
-       unsigned int cpu = smp_processor_id();
-       struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
-               unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
-       } ibcr, dbcr;
+       struct cpuinfo_arc_cache *p_ic = &ic_info, *p_dc = &dc_info;
+       struct bcr_cache ibcr, dbcr;
+       int vipt, assoc;
+       int n = 0;
 
-       p_ic = &cpuinfo_arc700[cpu].icache;
        READ_BCR(ARC_REG_IC_BCR, ibcr);
-
        if (!ibcr.ver)
                goto dc_chk;
 
-       if (ibcr.ver <= 3) {
+       if (is_isa_arcompact() && (ibcr.ver <= 3)) {
                BUG_ON(ibcr.config != 3);
-               p_ic->assoc = 2;                /* Fixed to 2w set assoc */
-       } else if (ibcr.ver >= 4) {
-               p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */
+               assoc = 2;              /* Fixed to 2w set assoc */
+       } else if (is_isa_arcv2() && (ibcr.ver >= 4)) {
+               assoc = 1 << ibcr.config;       /* 1,2,4,8 */
        }
 
        p_ic->line_len = 8 << ibcr.line_len;
        p_ic->sz_k = 1 << (ibcr.sz - 1);
-       p_ic->vipt = 1;
-       p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
+       p_ic->colors = p_ic->sz_k/assoc/TO_KB(PAGE_SIZE);
+
+       n += scnprintf(buf + n, len - n,
+                       "I-Cache\t\t: %uK, %dway/set, %uB Line, VIPT%s%s\n",
+                       p_ic->sz_k, assoc, p_ic->line_len,
+                       p_ic->colors > 1 ? " aliasing" : "",
+                       IS_USED_CFG(CONFIG_ARC_HAS_ICACHE));
 
 dc_chk:
-       p_dc = &cpuinfo_arc700[cpu].dcache;
        READ_BCR(ARC_REG_DC_BCR, dbcr);
-
        if (!dbcr.ver)
                goto slc_chk;
 
-       if (dbcr.ver <= 3) {
+       if (is_isa_arcompact() && (dbcr.ver <= 3)) {
                BUG_ON(dbcr.config != 2);
-               p_dc->assoc = 4;                /* Fixed to 4w set assoc */
-               p_dc->vipt = 1;
-               p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
-       } else if (dbcr.ver >= 4) {
-               p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */
-               p_dc->vipt = 0;
-               p_dc->alias = 0;                /* PIPT so can't VIPT alias */
+               vipt = 1;
+               assoc = 4;              /* Fixed to 4w set assoc */
+               p_dc->colors = p_dc->sz_k/assoc/TO_KB(PAGE_SIZE);
+       } else if (is_isa_arcv2() && (dbcr.ver >= 4)) {
+               vipt = 0;
+               assoc = 1 << dbcr.config;       /* 1,2,4,8 */
+               p_dc->colors = 1;               /* PIPT so can't VIPT alias */
        }
 
        p_dc->line_len = 16 << dbcr.line_len;
        p_dc->sz_k = 1 << (dbcr.sz - 1);
 
+       n += scnprintf(buf + n, len - n,
+                       "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",
+                       p_dc->sz_k, assoc, p_dc->line_len,
+                       vipt ? "VIPT" : "PIPT",
+                       p_dc->colors > 1 ? " aliasing" : "",
+                       IS_USED_CFG(CONFIG_ARC_HAS_DCACHE));
+
 slc_chk:
        if (is_isa_arcv2())
-                read_decode_cache_bcr_arcv2(cpu);
+               n += read_decode_cache_bcr_arcv2(c, buf + n, len - n);
+
+       return n;
 }
 
 /*
@@ -581,7 +544,7 @@ static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
 
 #endif /* CONFIG_ARC_HAS_ICACHE */
 
-noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
+static noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
        /*
@@ -644,7 +607,7 @@ noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
 #endif
 }
 
-noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
+static __maybe_unused noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
 {
 #ifdef CONFIG_ISA_ARCV2
        /*
@@ -1082,7 +1045,7 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
  * 3. All Caches need to be disabled when setting up IOC to elide any in-flight
  *    Coherency transactions
  */
-noinline void __init arc_ioc_setup(void)
+static noinline void __init arc_ioc_setup(void)
 {
        unsigned int ioc_base, mem_sz;
 
@@ -1144,12 +1107,10 @@ noinline void __init arc_ioc_setup(void)
  *    one core suffices for all
  *  - IOC setup / dma callbacks only need to be done once
  */
-void __init arc_cache_init_master(void)
+static noinline void __init arc_cache_init_master(void)
 {
-       unsigned int __maybe_unused cpu = smp_processor_id();
-
        if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
-               struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+               struct cpuinfo_arc_cache *ic = &ic_info;
 
                if (!ic->line_len)
                        panic("cache support enabled but non-existent cache\n");
@@ -1162,14 +1123,14 @@ void __init arc_cache_init_master(void)
                 * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
                 * pair to provide vaddr/paddr respectively, just as in MMU v3
                 */
-               if (is_isa_arcv2() && ic->alias)
+               if (is_isa_arcv2() && ic->colors > 1)
                        _cache_line_loop_ic_fn = __cache_line_loop_v3;
                else
                        _cache_line_loop_ic_fn = __cache_line_loop;
        }
 
        if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
-               struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+               struct cpuinfo_arc_cache *dc = &dc_info;
 
                if (!dc->line_len)
                        panic("cache support enabled but non-existent cache\n");
@@ -1181,14 +1142,13 @@ void __init arc_cache_init_master(void)
                /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
                if (is_isa_arcompact()) {
                        int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
-                       int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE);
 
-                       if (dc->alias) {
+                       if (dc->colors > 1) {
                                if (!handled)
                                        panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
-                               if (CACHE_COLORS_NUM != num_colors)
+                               if (CACHE_COLORS_NUM != dc->colors)
                                        panic("CACHE_COLORS_NUM not optimized for config\n");
-                       } else if (!dc->alias && handled) {
+                       } else if (handled && dc->colors == 1) {
                                panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
                        }
                }
@@ -1231,9 +1191,6 @@ void __init arc_cache_init_master(void)
 void __ref arc_cache_init(void)
 {
        unsigned int __maybe_unused cpu = smp_processor_id();
-       char str[256];
-
-       pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
 
        if (!cpu)
                arc_cache_init_master();
index 4e14c42..88fa3a4 100644 (file)
@@ -22,14 +22,3 @@ int fixup_exception(struct pt_regs *regs)
 
        return 0;
 }
-
-#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
-unsigned long arc_clear_user_noinline(void __user *to,
-               unsigned long n)
-{
-       return __arc_clear_user(to, n);
-}
-EXPORT_SYMBOL(arc_clear_user_noinline);
-
-#endif
index f59e722..95119a5 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
 #include <linux/mm_types.h>
+#include <asm/entry.h>
 #include <asm/mmu.h>
 
 /*
@@ -99,10 +100,10 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
        if (faulthandler_disabled() || !mm)
                goto no_context;
 
-       if (regs->ecr_cause & ECR_C_PROTV_STORE)        /* ST/EX */
+       if (regs->ecr.cause & ECR_C_PROTV_STORE)        /* ST/EX */
                write = 1;
-       else if ((regs->ecr_vec == ECR_V_PROTV) &&
-                (regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
+       else if ((regs->ecr.vec == ECR_V_PROTV) &&
+                (regs->ecr.cause == ECR_C_PROTV_INST_FETCH))
                exec = 1;
 
        flags = FAULT_FLAG_DEFAULT;
index 9f64d72..6a71b23 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/highmem.h>
 #include <asm/page.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/arcregs.h>
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
index 6f40f37..e536b2d 100644 (file)
@@ -18,7 +18,9 @@
 /* A copy of the ASID from the PID reg is kept in asid_cache */
 DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
-static int __read_mostly pae_exists;
+static struct cpuinfo_arc_mmu {
+       unsigned int ver, pg_sz_k, s_pg_sz_m, pae, sets, ways;
+} mmuinfo;
 
 /*
  * Utility Routine to erase a J-TLB entry
@@ -131,7 +133,7 @@ static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 
 noinline void local_flush_tlb_all(void)
 {
-       struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+       struct cpuinfo_arc_mmu *mmu = &mmuinfo;
        unsigned long flags;
        unsigned int entry;
        int num_tlb = mmu->sets * mmu->ways;
@@ -389,7 +391,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 /*
  * Routine to create a TLB entry
  */
-void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
+static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 {
        unsigned long flags;
        unsigned int asid_or_sasid, rwx;
@@ -564,89 +566,64 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
  */
-void read_decode_mmu_bcr(void)
+int arc_mmu_mumbojumbo(int c, char *buf, int len)
 {
-       struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-       unsigned int tmp;
-       struct bcr_mmu_3 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-       unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
-                    u_itlb:4, u_dtlb:4;
-#else
-       unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
-                    ways:4, ver:8;
-#endif
-       } *mmu3;
-
-       struct bcr_mmu_4 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-       unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
-                    n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
-#else
-       /*           DTLB      ITLB      JES        JE         JA      */
-       unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
-                    pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
-#endif
-       } *mmu4;
+       struct cpuinfo_arc_mmu *mmu = &mmuinfo;
+       unsigned int bcr, u_dtlb, u_itlb, sasid;
+       struct bcr_mmu_3 *mmu3;
+       struct bcr_mmu_4 *mmu4;
+       char super_pg[64] = "";
+       int n = 0;
 
-       tmp = read_aux_reg(ARC_REG_MMU_BCR);
-       mmu->ver = (tmp >> 24);
+       bcr = read_aux_reg(ARC_REG_MMU_BCR);
+       mmu->ver = (bcr >> 24);
 
        if (is_isa_arcompact() && mmu->ver == 3) {
-               mmu3 = (struct bcr_mmu_3 *)&tmp;
+               mmu3 = (struct bcr_mmu_3 *)&bcr;
                mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
                mmu->sets = 1 << mmu3->sets;
                mmu->ways = 1 << mmu3->ways;
-               mmu->u_dtlb = mmu3->u_dtlb;
-               mmu->u_itlb = mmu3->u_itlb;
-               mmu->sasid = mmu3->sasid;
+               u_dtlb = mmu3->u_dtlb;
+               u_itlb = mmu3->u_itlb;
+               sasid = mmu3->sasid;
        } else {
-               mmu4 = (struct bcr_mmu_4 *)&tmp;
+               mmu4 = (struct bcr_mmu_4 *)&bcr;
                mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
                mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
                mmu->sets = 64 << mmu4->n_entry;
                mmu->ways = mmu4->n_ways * 2;
-               mmu->u_dtlb = mmu4->u_dtlb * 4;
-               mmu->u_itlb = mmu4->u_itlb * 4;
-               mmu->sasid = mmu4->sasid;
-               pae_exists = mmu->pae = mmu4->pae;
+               u_dtlb = mmu4->u_dtlb * 4;
+               u_itlb = mmu4->u_itlb * 4;
+               sasid = mmu4->sasid;
+               mmu->pae = mmu4->pae;
        }
-}
 
-char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
-{
-       int n = 0;
-       struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
-       char super_pg[64] = "";
-
-       if (p_mmu->s_pg_sz_m)
-               scnprintf(super_pg, 64, "%dM Super Page %s",
-                         p_mmu->s_pg_sz_m,
-                         IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
+       if (mmu->s_pg_sz_m)
+               scnprintf(super_pg, 64, "/%dM%s",
+                         mmu->s_pg_sz_m,
+                         IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? " (THP enabled)":"");
 
        n += scnprintf(buf + n, len - n,
-                     "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
-                      p_mmu->ver, p_mmu->pg_sz_k, super_pg,  CONFIG_PGTABLE_LEVELS,
-                      p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
-                      p_mmu->u_dtlb, p_mmu->u_itlb,
-                      IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
-
-       return buf;
+                     "MMU [v%x]\t: %dk%s, swalk %d lvl, JTLB %dx%d, uDTLB %d, uITLB %d%s%s%s\n",
+                      mmu->ver, mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS,
+                      mmu->sets, mmu->ways,
+                      u_dtlb, u_itlb,
+                      IS_AVAIL1(sasid, ", SASID"),
+                      IS_AVAIL2(mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
+
+       return n;
 }
 
 int pae40_exist_but_not_enab(void)
 {
-       return pae_exists && !is_pae40_enabled();
+       return mmuinfo.pae && !is_pae40_enabled();
 }
 
 void arc_mmu_init(void)
 {
-       struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
-       char str[256];
+       struct cpuinfo_arc_mmu *mmu = &mmuinfo;
        int compat = 0;
 
-       pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
-
        /*
         * Can't be done in processor.h due to header include dependencies
         */
@@ -723,7 +700,7 @@ volatile int dup_pd_silent; /* Be silent abt it or complain (default) */
 void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
                          struct pt_regs *regs)
 {
-       struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+       struct cpuinfo_arc_mmu *mmu = &mmuinfo;
        unsigned long flags;
        int set, n_ways = mmu->ways;
 
index b821df7..1feb990 100644 (file)
@@ -6,7 +6,6 @@
  */
 
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 #include <linux/libfdt.h>
 
 #include <asm/asm-offsets.h>
index db96dcb..8803a0f 100644 (file)
@@ -1 +1,2 @@
+# Help: DRAM base at 0x00000000
 CONFIG_DRAM_BASE=0x00000000
index 343d533..aab8f86 100644 (file)
@@ -1 +1,2 @@
+# Help: DRAM base at 0xc0000000
 CONFIG_DRAM_BASE=0xc0000000
index 61ba704..4aabce4 100644 (file)
@@ -1 +1,2 @@
+# Help: DRAM base at 0xd0000000
 CONFIG_DRAM_BASE=0xd0000000
index a6d6f7a..1ab94da 100644 (file)
@@ -1,2 +1,3 @@
+# Help: Enable Large Physical Address Extension mode
 CONFIG_ARM_LPAE=y
 CONFIG_VMSPLIT_2G=y
index f3cd04f..72529f5 100644 (file)
@@ -227,6 +227,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
        return false;
 }
 
+static inline void kvm_vcpu_pmu_resync_el0(void) {}
+
 /* PMU Version in DFR Register */
 #define ARMV8_PMU_DFR_VER_NI        0
 #define ARMV8_PMU_DFR_VER_V3P4      0x5
diff --git a/arch/arm/include/asm/ide.h b/arch/arm/include/asm/ide.h
deleted file mode 100644 (file)
index a81e0b0..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  arch/arm/include/asm/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the ARM architecture specific IDE code.
- */
-
-#ifndef __ASMARM_IDE_H
-#define __ASMARM_IDE_H
-
-#ifdef __KERNEL__
-
-#define __ide_mm_insw(port,addr,len)   readsw(port,addr,len)
-#define __ide_mm_insl(port,addr,len)   readsl(port,addr,len)
-#define __ide_mm_outsw(port,addr,len)  writesw(port,addr,len)
-#define __ide_mm_outsl(port,addr,len)  writesl(port,addr,len)
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMARM_IDE_H */
index 6865d54..c47c36f 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Virtualization guest
 #
 # Base options for platforms
 #
index f482b99..bcd5622 100644 (file)
@@ -156,4 +156,6 @@ static inline void efi_capsule_flush_cache_range(void *addr, int size)
 
 efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f);
 
+void efi_icache_sync(unsigned long start, unsigned long end);
+
 #endif /* _ASM_EFI_H */
index 58e5eb2..5882b24 100644 (file)
 #define HCR_DCT                (UL(1) << 57)
 #define HCR_ATA_SHIFT  56
 #define HCR_ATA                (UL(1) << HCR_ATA_SHIFT)
+#define HCR_TTLBOS     (UL(1) << 55)
+#define HCR_TTLBIS     (UL(1) << 54)
+#define HCR_ENSCXT     (UL(1) << 53)
+#define HCR_TOCU       (UL(1) << 52)
 #define HCR_AMVOFFEN   (UL(1) << 51)
+#define HCR_TICAB      (UL(1) << 50)
 #define HCR_TID4       (UL(1) << 49)
 #define HCR_FIEN       (UL(1) << 47)
 #define HCR_FWB                (UL(1) << 46)
+#define HCR_NV2                (UL(1) << 45)
+#define HCR_AT         (UL(1) << 44)
+#define HCR_NV1                (UL(1) << 43)
+#define HCR_NV         (UL(1) << 42)
 #define HCR_API                (UL(1) << 41)
 #define HCR_APK                (UL(1) << 40)
 #define HCR_TEA                (UL(1) << 37)
@@ -89,7 +98,6 @@
                         HCR_BSU_IS | HCR_FB | HCR_TACR | \
                         HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
                         HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
-#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
 #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
 #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
                                 BIT(18) |              \
                                 GENMASK(16, 15))
 
+/*
+ * FGT register definitions
+ *
+ * RES0 and polarity masks as of DDI0487J.a, to be updated as needed.
+ * We're not using the generated masks as they are usually ahead of
+ * the published ARM ARM, which we use as a reference.
+ *
+ * Once we get to a point where the two describe the same thing, we'll
+ * merge the definitions. One day.
+ */
+#define __HFGRTR_EL2_RES0      (GENMASK(63, 56) | GENMASK(53, 51))
+#define __HFGRTR_EL2_MASK      GENMASK(49, 0)
+#define __HFGRTR_EL2_nMASK     (GENMASK(55, 54) | BIT(50))
+
+#define __HFGWTR_EL2_RES0      (GENMASK(63, 56) | GENMASK(53, 51) |    \
+                                BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
+                                GENMASK(26, 25) | BIT(21) | BIT(18) |  \
+                                GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
+#define __HFGWTR_EL2_MASK      GENMASK(49, 0)
+#define __HFGWTR_EL2_nMASK     (GENMASK(55, 54) | BIT(50))
+
+#define __HFGITR_EL2_RES0      GENMASK(63, 57)
+#define __HFGITR_EL2_MASK      GENMASK(54, 0)
+#define __HFGITR_EL2_nMASK     GENMASK(56, 55)
+
+#define __HDFGRTR_EL2_RES0     (BIT(49) | BIT(42) | GENMASK(39, 38) |  \
+                                GENMASK(21, 20) | BIT(8))
+#define __HDFGRTR_EL2_MASK     ~__HDFGRTR_EL2_nMASK
+#define __HDFGRTR_EL2_nMASK    GENMASK(62, 59)
+
+#define __HDFGWTR_EL2_RES0     (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
+                                BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
+                                BIT(22) | BIT(9) | BIT(6))
+#define __HDFGWTR_EL2_MASK     ~__HDFGWTR_EL2_nMASK
+#define __HDFGWTR_EL2_nMASK    GENMASK(62, 60)
+
+/* Similar definitions for HCRX_EL2 */
+#define __HCRX_EL2_RES0                (GENMASK(63, 16) | GENMASK(13, 12))
+#define __HCRX_EL2_MASK                (0)
+#define __HCRX_EL2_nMASK       (GENMASK(15, 14) | GENMASK(4, 0))
+
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK     (~UL(0xf))
 /*
index 24e28bb..24b5e6b 100644 (file)
@@ -70,6 +70,7 @@ enum __kvm_host_smccc_func {
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
        __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+       __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
        __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
        __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
        __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
@@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
 extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
                                         phys_addr_t ipa,
                                         int level);
+extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                                       phys_addr_t start, unsigned long pages);
 extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
 
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
index d3dd05b..af06ccb 100644 (file)
@@ -49,6 +49,7 @@
 #define KVM_REQ_RELOAD_GICv4   KVM_ARCH_REQ(4)
 #define KVM_REQ_RELOAD_PMU     KVM_ARCH_REQ(5)
 #define KVM_REQ_SUSPEND                KVM_ARCH_REQ(6)
+#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
 
 #define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
                                     KVM_DIRTY_LOG_INITIALLY_SET)
@@ -380,6 +381,7 @@ enum vcpu_sysreg {
        CPTR_EL2,       /* Architectural Feature Trap Register (EL2) */
        HSTR_EL2,       /* Hypervisor System Trap Register */
        HACR_EL2,       /* Hypervisor Auxiliary Control Register */
+       HCRX_EL2,       /* Extended Hypervisor Configuration Register */
        TTBR0_EL2,      /* Translation Table Base Register 0 (EL2) */
        TTBR1_EL2,      /* Translation Table Base Register 1 (EL2) */
        TCR_EL2,        /* Translation Control Register (EL2) */
@@ -400,6 +402,11 @@ enum vcpu_sysreg {
        TPIDR_EL2,      /* EL2 Software Thread ID Register */
        CNTHCTL_EL2,    /* Counter-timer Hypervisor Control register */
        SP_EL2,         /* EL2 Stack Pointer */
+       HFGRTR_EL2,
+       HFGWTR_EL2,
+       HFGITR_EL2,
+       HDFGRTR_EL2,
+       HDFGWTR_EL2,
        CNTHP_CTL_EL2,
        CNTHP_CVAL_EL2,
        CNTHV_CTL_EL2,
@@ -567,8 +574,7 @@ struct kvm_vcpu_arch {
        /* Cache some mmu pages needed inside spinlock regions */
        struct kvm_mmu_memory_cache mmu_page_cache;
 
-       /* Target CPU and feature flags */
-       int target;
+       /* feature flags */
        DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
 
        /* Virtual SError ESR to restore when HCR_EL2.VSE is set */
@@ -669,6 +675,8 @@ struct kvm_vcpu_arch {
 #define VCPU_SVE_FINALIZED     __vcpu_single_flag(cflags, BIT(1))
 /* PTRAUTH exposed to guest */
 #define GUEST_HAS_PTRAUTH      __vcpu_single_flag(cflags, BIT(2))
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED       __vcpu_single_flag(cflags, BIT(3))
 
 /* Exception pending */
 #define PENDING_EXCEPTION      __vcpu_single_flag(iflags, BIT(0))
@@ -899,7 +907,6 @@ struct kvm_vcpu_stat {
        u64 exits;
 };
 
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
@@ -967,8 +974,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
 #define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
 #endif /* __KVM_NVHE_HYPERVISOR__ */
 
-void force_vm_exit(const cpumask_t *mask);
-
 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
 
@@ -983,6 +988,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
 void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
 
 int __init kvm_sys_reg_table_init(void);
+int __init populate_nv_trap_config(void);
 
 bool lock_all_vcpus(struct kvm *kvm);
 void unlock_all_vcpus(struct kvm *kvm);
@@ -1049,8 +1055,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
        return cpus_have_const_cap(ARM64_SPECTRE_V3A);
 }
 
-void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
-
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 
@@ -1113,13 +1117,15 @@ int __init kvm_set_ipa_limit(void);
 #define __KVM_HAVE_ARCH_VM_ALLOC
 struct kvm *kvm_arch_alloc_vm(void);
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
 static inline bool kvm_vm_is_protected(struct kvm *kvm)
 {
        return false;
 }
 
-void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
-
 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
 
index 0e1e1ab..96a80e8 100644 (file)
@@ -168,6 +168,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
                           void __iomem **haddr);
 int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
                             void **haddr);
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
 void __init free_hyp_pgds(void);
 
 void stage2_unmap_vm(struct kvm *kvm);
index 8fb67f0..fa23cc9 100644 (file)
@@ -11,6 +11,8 @@ static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
                test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
 }
 
+extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
+
 struct sys_reg_params;
 struct sys_reg_desc;
 
index 929d355..d3e354b 100644 (file)
@@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
  *        kvm_pgtable_prot format.
  */
 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
+ *
+ * @mmu:       Stage-2 KVM MMU struct
+ * @addr:      The base Intermediate physical address from which to invalidate
+ * @size:      Size of the range from the base to invalidate
+ */
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t addr, size_t size);
 #endif /* __ARM64_KVM_PGTABLE_H__ */
index 16464bf..3829657 100644 (file)
 #define SYS_DC_CIGSW                   sys_insn(1, 0, 7, 14, 4)
 #define SYS_DC_CIGDSW                  sys_insn(1, 0, 7, 14, 6)
 
+#define SYS_IC_IALLUIS                 sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU                   sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU                    sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC                    sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC                   sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC                  sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC                    sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC                   sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC                  sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU                    sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP                    sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP                   sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP                  sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP                   sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP                  sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP                 sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC                   sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC                  sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC                 sys_insn(1, 3, 7, 14, 5)
+
+/* Data cache zero operations */
+#define SYS_DC_ZVA                     sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA                     sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA                    sys_insn(1, 3, 7, 4, 4)
+
 /*
  * Automatically generated definitions for system registers, the
  * manual encodings below are in the process of being converted to
 #define SYS_DBGDTRTX_EL0               sys_reg(2, 3, 0, 5, 0)
 #define SYS_DBGVCR32_EL2               sys_reg(2, 4, 0, 7, 0)
 
+#define SYS_BRBINF_EL1(n)              sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBINFINJ_EL1              sys_reg(2, 1, 9, 1, 0)
+#define SYS_BRBSRC_EL1(n)              sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBSRCINJ_EL1              sys_reg(2, 1, 9, 1, 1)
+#define SYS_BRBTGT_EL1(n)              sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+#define SYS_BRBTGTINJ_EL1              sys_reg(2, 1, 9, 1, 2)
+#define SYS_BRBTS_EL1                  sys_reg(2, 1, 9, 0, 2)
+
+#define SYS_BRBCR_EL1                  sys_reg(2, 1, 9, 0, 0)
+#define SYS_BRBFCR_EL1                 sys_reg(2, 1, 9, 0, 1)
+#define SYS_BRBIDR0_EL1                        sys_reg(2, 1, 9, 2, 0)
+
+#define SYS_TRCITECR_EL1               sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m)                        sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m)                 sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS              sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR                 sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR                  sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR                  sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0               sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1               sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m)               sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR                        sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET                        sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m)              sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m)             sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m)                        sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR                 sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH                 sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID                   sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R              sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R              sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m)            sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0                    sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10                   sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11                   sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12                   sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13                   sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1                    sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2                    sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3                    sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4                    sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5                    sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6                    sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7                    sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8                    sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9                    sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m)               sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR                 sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR                   sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR                 sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR                   sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m)               sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR                     sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m)               sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR               sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR                  sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m)                        sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m)                        sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m)              sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR               sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR                   sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR                  sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR                        sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR                  sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR                  sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR                        sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR              sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR                        sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0              sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1              sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m)              sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR                   sys_reg(2, 1, 1, 0, 4)
+
 #define SYS_MIDR_EL1                   sys_reg(3, 0, 0, 0, 0)
 #define SYS_MPIDR_EL1                  sys_reg(3, 0, 0, 0, 5)
 #define SYS_REVIDR_EL1                 sys_reg(3, 0, 0, 0, 6)
 #define SYS_ERXCTLR_EL1                        sys_reg(3, 0, 5, 4, 1)
 #define SYS_ERXSTATUS_EL1              sys_reg(3, 0, 5, 4, 2)
 #define SYS_ERXADDR_EL1                        sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1                        sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1              sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1              sys_reg(3, 0, 5, 4, 6)
 #define SYS_ERXMISC0_EL1               sys_reg(3, 0, 5, 5, 0)
 #define SYS_ERXMISC1_EL1               sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1               sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1               sys_reg(3, 0, 5, 5, 3)
 #define SYS_TFSR_EL1                   sys_reg(3, 0, 5, 6, 0)
 #define SYS_TFSRE0_EL1                 sys_reg(3, 0, 5, 6, 1)
 
 #define SYS_ICC_IGRPEN0_EL1            sys_reg(3, 0, 12, 12, 6)
 #define SYS_ICC_IGRPEN1_EL1            sys_reg(3, 0, 12, 12, 7)
 
+#define SYS_ACCDATA_EL1                        sys_reg(3, 0, 13, 0, 5)
+
 #define SYS_CNTKCTL_EL1                        sys_reg(3, 0, 14, 1, 0)
 
 #define SYS_AIDR_EL1                   sys_reg(3, 1, 0, 0, 7)
 #define SYS_VTCR_EL2                   sys_reg(3, 4, 2, 1, 2)
 
 #define SYS_TRFCR_EL2                  sys_reg(3, 4, 1, 2, 1)
-#define SYS_HDFGRTR_EL2                        sys_reg(3, 4, 3, 1, 4)
-#define SYS_HDFGWTR_EL2                        sys_reg(3, 4, 3, 1, 5)
 #define SYS_HAFGRTR_EL2                        sys_reg(3, 4, 3, 1, 6)
 #define SYS_SPSR_EL2                   sys_reg(3, 4, 4, 0, 0)
 #define SYS_ELR_EL2                    sys_reg(3, 4, 4, 0, 1)
 
 #define SYS_SP_EL2                     sys_reg(3, 6,  4, 1, 0)
 
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R    sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W    sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R    sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W    sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP   sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP   sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E2R    sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W    sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R   sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W   sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R   sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W   sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+
+/* TLBI instructions */
+#define OP_TLBI_VMALLE1OS              sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS                 sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS               sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS                        sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS                        sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS               sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS                        sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS               sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS               sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS              sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS              sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS                 sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS               sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS                        sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS                        sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS               sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS                        sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS               sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS               sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS              sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1                  sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1                 sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1                 sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1                        sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1                        sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1                   sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1                 sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1                  sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1                  sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1                 sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS           sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS              sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS            sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS             sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS             sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS            sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS             sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS            sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS            sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS           sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS           sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS              sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS            sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS             sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS             sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS            sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS             sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS            sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS            sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS           sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS               sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS              sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS              sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS             sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS             sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS                        sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS              sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS               sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS               sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS              sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS              sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS             sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS             sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS            sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS                        sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS                 sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS                        sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS                        sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS           sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS                        sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS               sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS                        sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS                 sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS                        sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS                        sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS           sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS              sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1                        sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1               sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS             sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS             sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1               sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1              sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS            sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS                        sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS               sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2                  sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2                 sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2                  sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2                   sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1                  sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2                  sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1             sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS           sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS          sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS          sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS         sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS             sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS              sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS             sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS             sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS                sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS             sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS            sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS             sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS              sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS             sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS             sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS                sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS           sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS             sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS            sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS          sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS          sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS            sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS           sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS         sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS             sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS            sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS               sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS              sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS               sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS                        sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS               sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS               sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS          sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_BRB_IALL                    sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ                     sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX                    sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX                    sys_insn(1, 3, 7, 3, 5)
+#define OP_CPP_RCTX                    sys_insn(1, 3, 7, 3, 7)
+
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_ENTP2        (BIT(60))
 #define SCTLR_ELx_DSSBS        (BIT(44))
index 55b50e1..b149cf9 100644 (file)
@@ -335,14 +335,77 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
  */
 #define MAX_TLBI_OPS   PTRS_PER_PTE
 
+/*
+ * __flush_tlb_range_op - Perform TLBI operation upon a range
+ *
+ * @op:        TLBI instruction that operates on a range (has 'r' prefix)
+ * @start:     The start address of the range
+ * @pages:     Range as the number of pages from 'start'
+ * @stride:    Flush granularity
+ * @asid:      The ASID of the task (0 for IPA instructions)
+ * @tlb_level: Translation Table level hint, if known
+ * @tlbi_user: If 'true', call an additional __tlbi_user()
+ *              (typically for user ASIDs). 'flase' for IPA instructions
+ *
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If 'pages' is odd, flush the first page through non-range
+ *    operations;
+ *
+ * 2. For remaining pages: the minimum range granularity is decided
+ *    by 'scale', so multiple range TLBI operations may be required.
+ *    Start from scale = 0, flush the corresponding number of pages
+ *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+ *    until no pages left.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride,                 \
+                               asid, tlb_level, tlbi_user)             \
+do {                                                                   \
+       int num = 0;                                                    \
+       int scale = 0;                                                  \
+       unsigned long addr;                                             \
+                                                                       \
+       while (pages > 0) {                                             \
+               if (!system_supports_tlb_range() ||                     \
+                   pages % 2 == 1) {                                   \
+                       addr = __TLBI_VADDR(start, asid);               \
+                       __tlbi_level(op, addr, tlb_level);              \
+                       if (tlbi_user)                                  \
+                               __tlbi_user_level(op, addr, tlb_level); \
+                       start += stride;                                \
+                       pages -= stride >> PAGE_SHIFT;                  \
+                       continue;                                       \
+               }                                                       \
+                                                                       \
+               num = __TLBI_RANGE_NUM(pages, scale);                   \
+               if (num >= 0) {                                         \
+                       addr = __TLBI_VADDR_RANGE(start, asid, scale,   \
+                                                 num, tlb_level);      \
+                       __tlbi(r##op, addr);                            \
+                       if (tlbi_user)                                  \
+                               __tlbi_user(r##op, addr);               \
+                       start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+                       pages -= __TLBI_RANGE_PAGES(num, scale);        \
+               }                                                       \
+               scale++;                                                \
+       }                                                               \
+} while (0)
+
+#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
+       __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
                                     unsigned long start, unsigned long end,
                                     unsigned long stride, bool last_level,
                                     int tlb_level)
 {
-       int num = 0;
-       int scale = 0;
-       unsigned long asid, addr, pages;
+       unsigned long asid, pages;
 
        start = round_down(start, stride);
        end = round_up(end, stride);
@@ -364,56 +427,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
        dsb(ishst);
        asid = ASID(vma->vm_mm);
 
-       /*
-        * When the CPU does not support TLB range operations, flush the TLB
-        * entries one by one at the granularity of 'stride'. If the TLB
-        * range ops are supported, then:
-        *
-        * 1. If 'pages' is odd, flush the first page through non-range
-        *    operations;
-        *
-        * 2. For remaining pages: the minimum range granularity is decided
-        *    by 'scale', so multiple range TLBI operations may be required.
-        *    Start from scale = 0, flush the corresponding number of pages
-        *    ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
-        *    until no pages left.
-        *
-        * Note that certain ranges can be represented by either num = 31 and
-        * scale or num = 0 and scale + 1. The loop below favours the latter
-        * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
-        */
-       while (pages > 0) {
-               if (!system_supports_tlb_range() ||
-                   pages % 2 == 1) {
-                       addr = __TLBI_VADDR(start, asid);
-                       if (last_level) {
-                               __tlbi_level(vale1is, addr, tlb_level);
-                               __tlbi_user_level(vale1is, addr, tlb_level);
-                       } else {
-                               __tlbi_level(vae1is, addr, tlb_level);
-                               __tlbi_user_level(vae1is, addr, tlb_level);
-                       }
-                       start += stride;
-                       pages -= stride >> PAGE_SHIFT;
-                       continue;
-               }
-
-               num = __TLBI_RANGE_NUM(pages, scale);
-               if (num >= 0) {
-                       addr = __TLBI_VADDR_RANGE(start, asid, scale,
-                                                 num, tlb_level);
-                       if (last_level) {
-                               __tlbi(rvale1is, addr);
-                               __tlbi_user(rvale1is, addr);
-                       } else {
-                               __tlbi(rvae1is, addr);
-                               __tlbi_user(rvae1is, addr);
-                       }
-                       start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
-                       pages -= __TLBI_RANGE_PAGES(num, scale);
-               }
-               scale++;
-       }
+       if (last_level)
+               __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+       else
+               __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+
        dsb(ish);
        mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
 }
index a5f533f..b018ae1 100644 (file)
@@ -2627,6 +2627,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .matches = has_cpuid_feature,
                ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP)
        },
+       {
+               .desc = "Fine Grained Traps",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_HAS_FGT,
+               .matches = has_cpuid_feature,
+               ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
+       },
 #ifdef CONFIG_ARM64_SME
        {
                .desc = "Scalable Matrix Extension",
index aee12c7..3addc09 100644 (file)
@@ -262,9 +262,9 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
                if (!len)
                        return;
 
-               len = strscpy(buf, cmdline, ARRAY_SIZE(buf));
-               if (len == -E2BIG)
-                       len = ARRAY_SIZE(buf) - 1;
+               len = min(len, ARRAY_SIZE(buf) - 1);
+               memcpy(buf, cmdline, len);
+               buf[len] = '\0';
 
                if (strcmp(buf, "--") == 0)
                        return;
index f531da6..83c1e09 100644 (file)
@@ -25,7 +25,6 @@ menuconfig KVM
        select MMU_NOTIFIER
        select PREEMPT_NOTIFIERS
        select HAVE_KVM_CPU_RELAX_INTERCEPT
-       select HAVE_KVM_ARCH_TLB_FLUSH_ALL
        select KVM_MMIO
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
        select KVM_XFER_TO_GUEST_WORK
@@ -43,6 +42,7 @@ menuconfig KVM
        select SCHED_INFO
        select GUEST_PERF_EVENTS if PERF_EVENTS
        select INTERVAL_TREE
+       select XARRAY_MULTI
        help
          Support hosting virtualized guest machines.
 
index d1cb298..4866b3f 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
 #include <asm/kvm_pkvm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/sections.h>
@@ -365,7 +366,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 #endif
 
        /* Force users to call KVM_ARM_VCPU_INIT */
-       vcpu->arch.target = -1;
+       vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
        bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
        vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@@ -462,7 +463,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                vcpu_ptrauth_disable(vcpu);
        kvm_arch_vcpu_load_debug_state_flags(vcpu);
 
-       if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
+       if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
                vcpu_set_on_unsupported_cpu(vcpu);
 }
 
@@ -574,7 +575,7 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
 
 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 {
-       return vcpu->arch.target >= 0;
+       return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
 }
 
 /*
@@ -803,6 +804,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
                        kvm_pmu_handle_pmcr(vcpu,
                                            __vcpu_sys_reg(vcpu, PMCR_EL0));
 
+               if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu))
+                       kvm_vcpu_pmu_restore_guest(vcpu);
+
                if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
                        return kvm_vcpu_suspend(vcpu);
 
@@ -818,6 +822,9 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
        if (likely(!vcpu_mode_is_32bit(vcpu)))
                return false;
 
+       if (vcpu_has_nv(vcpu))
+               return true;
+
        return !kvm_supports_32bit_el0();
 }
 
@@ -1058,7 +1065,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                         * invalid. The VMM can try and fix it by issuing  a
                         * KVM_ARM_VCPU_INIT if it really wants to.
                         */
-                       vcpu->arch.target = -1;
+                       vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
                        ret = ARM_EXCEPTION_IL;
                }
 
@@ -1219,8 +1226,7 @@ static bool kvm_vcpu_init_changed(struct kvm_vcpu *vcpu,
 {
        unsigned long features = init->features[0];
 
-       return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) ||
-                       vcpu->arch.target != init->target;
+       return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
 }
 
 static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
@@ -1236,20 +1242,18 @@ static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
            !bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
                goto out_unlock;
 
-       vcpu->arch.target = init->target;
        bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
 
        /* Now we know what it is, we can reset it. */
        ret = kvm_reset_vcpu(vcpu);
        if (ret) {
-               vcpu->arch.target = -1;
                bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
                goto out_unlock;
        }
 
        bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
        set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
-
+       vcpu_set_flag(vcpu, VCPU_INITIALIZED);
 out_unlock:
        mutex_unlock(&kvm->arch.config_lock);
        return ret;
@@ -1260,14 +1264,15 @@ static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
        int ret;
 
-       if (init->target != kvm_target_cpu())
+       if (init->target != KVM_ARM_TARGET_GENERIC_V8 &&
+           init->target != kvm_target_cpu())
                return -EINVAL;
 
        ret = kvm_vcpu_init_check_features(vcpu, init);
        if (ret)
                return ret;
 
-       if (vcpu->arch.target == -1)
+       if (!kvm_vcpu_initialized(vcpu))
                return __kvm_vcpu_set_target(vcpu, init);
 
        if (kvm_vcpu_init_changed(vcpu, init))
@@ -1532,12 +1537,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
 static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
                                        struct kvm_arm_device_addr *dev_addr)
 {
@@ -1595,9 +1594,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
                return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
        }
        case KVM_ARM_PREFERRED_TARGET: {
-               struct kvm_vcpu_init init;
-
-               kvm_vcpu_preferred_target(&init);
+               struct kvm_vcpu_init init = {
+                       .target = KVM_ARM_TARGET_GENERIC_V8,
+               };
 
                if (copy_to_user(argp, &init, sizeof(init)))
                        return -EFAULT;
@@ -2276,30 +2275,8 @@ static int __init init_hyp_mode(void)
        for_each_possible_cpu(cpu) {
                struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
                char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
-               unsigned long hyp_addr;
 
-               /*
-                * Allocate a contiguous HYP private VA range for the stack
-                * and guard page. The allocation is also aligned based on
-                * the order of its size.
-                */
-               err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
-               if (err) {
-                       kvm_err("Cannot allocate hyp stack guard page\n");
-                       goto out_err;
-               }
-
-               /*
-                * Since the stack grows downwards, map the stack to the page
-                * at the higher address and leave the lower guard page
-                * unbacked.
-                *
-                * Any valid stack address now has the PAGE_SHIFT bit as 1
-                * and addresses corresponding to the guard page have the
-                * PAGE_SHIFT bit as 0 - this is used for overflow detection.
-                */
-               err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
-                                           __pa(stack_page), PAGE_HYP);
+               err = create_hyp_stack(__pa(stack_page), &params->stack_hyp_va);
                if (err) {
                        kvm_err("Cannot map hyp stack\n");
                        goto out_err;
@@ -2312,8 +2289,6 @@ static int __init init_hyp_mode(void)
                 * has been mapped in the flexible private VA space.
                 */
                params->stack_pa = __pa(stack_page);
-
-               params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
        }
 
        for_each_possible_cpu(cpu) {
index b966620..9ced1bf 100644 (file)
 
 #include "trace.h"
 
+enum trap_behaviour {
+       BEHAVE_HANDLE_LOCALLY   = 0,
+       BEHAVE_FORWARD_READ     = BIT(0),
+       BEHAVE_FORWARD_WRITE    = BIT(1),
+       BEHAVE_FORWARD_ANY      = BEHAVE_FORWARD_READ | BEHAVE_FORWARD_WRITE,
+};
+
+struct trap_bits {
+       const enum vcpu_sysreg          index;
+       const enum trap_behaviour       behaviour;
+       const u64                       value;
+       const u64                       mask;
+};
+
+/* Coarse Grained Trap definitions */
+enum cgt_group_id {
+       /* Indicates no coarse trap control */
+       __RESERVED__,
+
+       /*
+        * The first batch of IDs denote coarse trapping that are used
+        * on their own instead of being part of a combination of
+        * trap controls.
+        */
+       CGT_HCR_TID1,
+       CGT_HCR_TID2,
+       CGT_HCR_TID3,
+       CGT_HCR_IMO,
+       CGT_HCR_FMO,
+       CGT_HCR_TIDCP,
+       CGT_HCR_TACR,
+       CGT_HCR_TSW,
+       CGT_HCR_TPC,
+       CGT_HCR_TPU,
+       CGT_HCR_TTLB,
+       CGT_HCR_TVM,
+       CGT_HCR_TDZ,
+       CGT_HCR_TRVM,
+       CGT_HCR_TLOR,
+       CGT_HCR_TERR,
+       CGT_HCR_APK,
+       CGT_HCR_NV,
+       CGT_HCR_NV_nNV2,
+       CGT_HCR_NV1_nNV2,
+       CGT_HCR_AT,
+       CGT_HCR_nFIEN,
+       CGT_HCR_TID4,
+       CGT_HCR_TICAB,
+       CGT_HCR_TOCU,
+       CGT_HCR_ENSCXT,
+       CGT_HCR_TTLBIS,
+       CGT_HCR_TTLBOS,
+
+       CGT_MDCR_TPMCR,
+       CGT_MDCR_TPM,
+       CGT_MDCR_TDE,
+       CGT_MDCR_TDA,
+       CGT_MDCR_TDOSA,
+       CGT_MDCR_TDRA,
+       CGT_MDCR_E2PB,
+       CGT_MDCR_TPMS,
+       CGT_MDCR_TTRF,
+       CGT_MDCR_E2TB,
+       CGT_MDCR_TDCC,
+
+       /*
+        * Anything after this point is a combination of coarse trap
+        * controls, which must all be evaluated to decide what to do.
+        */
+       __MULTIPLE_CONTROL_BITS__,
+       CGT_HCR_IMO_FMO = __MULTIPLE_CONTROL_BITS__,
+       CGT_HCR_TID2_TID4,
+       CGT_HCR_TTLB_TTLBIS,
+       CGT_HCR_TTLB_TTLBOS,
+       CGT_HCR_TVM_TRVM,
+       CGT_HCR_TPU_TICAB,
+       CGT_HCR_TPU_TOCU,
+       CGT_HCR_NV1_nNV2_ENSCXT,
+       CGT_MDCR_TPM_TPMCR,
+       CGT_MDCR_TDE_TDA,
+       CGT_MDCR_TDE_TDOSA,
+       CGT_MDCR_TDE_TDRA,
+       CGT_MDCR_TDCC_TDE_TDA,
+
+       /*
+        * Anything after this point requires a callback evaluating a
+        * complex trap condition. Ugly stuff.
+        */
+       __COMPLEX_CONDITIONS__,
+       CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__,
+       CGT_CNTHCTL_EL1PTEN,
+
+       /* Must be last */
+       __NR_CGT_GROUP_IDS__
+};
+
+static const struct trap_bits coarse_trap_bits[] = {
+       [CGT_HCR_TID1] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID1,
+               .mask           = HCR_TID1,
+               .behaviour      = BEHAVE_FORWARD_READ,
+       },
+       [CGT_HCR_TID2] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID2,
+               .mask           = HCR_TID2,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TID3] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID3,
+               .mask           = HCR_TID3,
+               .behaviour      = BEHAVE_FORWARD_READ,
+       },
+       [CGT_HCR_IMO] = {
+               .index          = HCR_EL2,
+               .value          = HCR_IMO,
+               .mask           = HCR_IMO,
+               .behaviour      = BEHAVE_FORWARD_WRITE,
+       },
+       [CGT_HCR_FMO] = {
+               .index          = HCR_EL2,
+               .value          = HCR_FMO,
+               .mask           = HCR_FMO,
+               .behaviour      = BEHAVE_FORWARD_WRITE,
+       },
+       [CGT_HCR_TIDCP] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TIDCP,
+               .mask           = HCR_TIDCP,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TACR] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TACR,
+               .mask           = HCR_TACR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TSW] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TSW,
+               .mask           = HCR_TSW,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TPC] = { /* Also called TCPC when FEAT_DPB is implemented */
+               .index          = HCR_EL2,
+               .value          = HCR_TPC,
+               .mask           = HCR_TPC,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TPU] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TPU,
+               .mask           = HCR_TPU,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TTLB] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TTLB,
+               .mask           = HCR_TTLB,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TVM] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TVM,
+               .mask           = HCR_TVM,
+               .behaviour      = BEHAVE_FORWARD_WRITE,
+       },
+       [CGT_HCR_TDZ] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TDZ,
+               .mask           = HCR_TDZ,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TRVM] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TRVM,
+               .mask           = HCR_TRVM,
+               .behaviour      = BEHAVE_FORWARD_READ,
+       },
+       [CGT_HCR_TLOR] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TLOR,
+               .mask           = HCR_TLOR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TERR] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TERR,
+               .mask           = HCR_TERR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_APK] = {
+               .index          = HCR_EL2,
+               .value          = 0,
+               .mask           = HCR_APK,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_NV] = {
+               .index          = HCR_EL2,
+               .value          = HCR_NV,
+               .mask           = HCR_NV,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_NV_nNV2] = {
+               .index          = HCR_EL2,
+               .value          = HCR_NV,
+               .mask           = HCR_NV | HCR_NV2,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_NV1_nNV2] = {
+               .index          = HCR_EL2,
+               .value          = HCR_NV | HCR_NV1,
+               .mask           = HCR_NV | HCR_NV1 | HCR_NV2,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_AT] = {
+               .index          = HCR_EL2,
+               .value          = HCR_AT,
+               .mask           = HCR_AT,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_nFIEN] = {
+               .index          = HCR_EL2,
+               .value          = 0,
+               .mask           = HCR_FIEN,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TID4] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TID4,
+               .mask           = HCR_TID4,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TICAB] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TICAB,
+               .mask           = HCR_TICAB,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TOCU] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TOCU,
+               .mask           = HCR_TOCU,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_ENSCXT] = {
+               .index          = HCR_EL2,
+               .value          = 0,
+               .mask           = HCR_ENSCXT,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TTLBIS] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TTLBIS,
+               .mask           = HCR_TTLBIS,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_HCR_TTLBOS] = {
+               .index          = HCR_EL2,
+               .value          = HCR_TTLBOS,
+               .mask           = HCR_TTLBOS,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TPMCR] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TPMCR,
+               .mask           = MDCR_EL2_TPMCR,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TPM] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TPM,
+               .mask           = MDCR_EL2_TPM,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDE] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDE,
+               .mask           = MDCR_EL2_TDE,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDA] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDA,
+               .mask           = MDCR_EL2_TDA,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDOSA] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDOSA,
+               .mask           = MDCR_EL2_TDOSA,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDRA] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDRA,
+               .mask           = MDCR_EL2_TDRA,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_E2PB] = {
+               .index          = MDCR_EL2,
+               .value          = 0,
+               .mask           = BIT(MDCR_EL2_E2PB_SHIFT),
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TPMS] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TPMS,
+               .mask           = MDCR_EL2_TPMS,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TTRF] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TTRF,
+               .mask           = MDCR_EL2_TTRF,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_E2TB] = {
+               .index          = MDCR_EL2,
+               .value          = 0,
+               .mask           = BIT(MDCR_EL2_E2TB_SHIFT),
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+       [CGT_MDCR_TDCC] = {
+               .index          = MDCR_EL2,
+               .value          = MDCR_EL2_TDCC,
+               .mask           = MDCR_EL2_TDCC,
+               .behaviour      = BEHAVE_FORWARD_ANY,
+       },
+};
+
+#define MCB(id, ...)                                           \
+       [id - __MULTIPLE_CONTROL_BITS__]        =               \
+               (const enum cgt_group_id[]){                    \
+               __VA_ARGS__, __RESERVED__                       \
+               }
+
+static const enum cgt_group_id *coarse_control_combo[] = {
+       MCB(CGT_HCR_IMO_FMO,            CGT_HCR_IMO, CGT_HCR_FMO),
+       MCB(CGT_HCR_TID2_TID4,          CGT_HCR_TID2, CGT_HCR_TID4),
+       MCB(CGT_HCR_TTLB_TTLBIS,        CGT_HCR_TTLB, CGT_HCR_TTLBIS),
+       MCB(CGT_HCR_TTLB_TTLBOS,        CGT_HCR_TTLB, CGT_HCR_TTLBOS),
+       MCB(CGT_HCR_TVM_TRVM,           CGT_HCR_TVM, CGT_HCR_TRVM),
+       MCB(CGT_HCR_TPU_TICAB,          CGT_HCR_TPU, CGT_HCR_TICAB),
+       MCB(CGT_HCR_TPU_TOCU,           CGT_HCR_TPU, CGT_HCR_TOCU),
+       MCB(CGT_HCR_NV1_nNV2_ENSCXT,    CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT),
+       MCB(CGT_MDCR_TPM_TPMCR,         CGT_MDCR_TPM, CGT_MDCR_TPMCR),
+       MCB(CGT_MDCR_TDE_TDA,           CGT_MDCR_TDE, CGT_MDCR_TDA),
+       MCB(CGT_MDCR_TDE_TDOSA,         CGT_MDCR_TDE, CGT_MDCR_TDOSA),
+       MCB(CGT_MDCR_TDE_TDRA,          CGT_MDCR_TDE, CGT_MDCR_TDRA),
+       MCB(CGT_MDCR_TDCC_TDE_TDA,      CGT_MDCR_TDCC, CGT_MDCR_TDE, CGT_MDCR_TDA),
+};
+
+typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *);
+
+/*
+ * Warning, maximum confusion ahead.
+ *
+ * When E2H=0, CNTHCTL_EL2[1:0] are defined as EL1PCEN:EL1PCTEN
+ * When E2H=1, CNTHCTL_EL2[11:10] are defined as EL1PTEN:EL1PCTEN
+ *
+ * Note the single letter difference? Yet, the bits have the same
+ * function despite a different layout and a different name.
+ *
+ * We don't try to reconcile this mess. We just use the E2H=0 bits
+ * to generate something that is in the E2H=1 format, and live with
+ * it. You're welcome.
+ */
+static u64 get_sanitized_cnthctl(struct kvm_vcpu *vcpu)
+{
+       u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+       if (!vcpu_el2_e2h_is_set(vcpu))
+               val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
+
+       return val & ((CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN) << 10);
+}
+
+static enum trap_behaviour check_cnthctl_el1pcten(struct kvm_vcpu *vcpu)
+{
+       if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCTEN << 10))
+               return BEHAVE_HANDLE_LOCALLY;
+
+       return BEHAVE_FORWARD_ANY;
+}
+
+static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu)
+{
+       if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCEN << 10))
+               return BEHAVE_HANDLE_LOCALLY;
+
+       return BEHAVE_FORWARD_ANY;
+}
+
+#define CCC(id, fn)                            \
+       [id - __COMPLEX_CONDITIONS__] = fn
+
+static const complex_condition_check ccc[] = {
+       CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten),
+       CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten),
+};
+
+/*
+ * Bit assignment for the trap controls. We use a 64bit word with the
+ * following layout for each trapped sysreg:
+ *
+ * [9:0]       enum cgt_group_id (10 bits)
+ * [13:10]     enum fgt_group_id (4 bits)
+ * [19:14]     bit number in the FGT register (6 bits)
+ * [20]                trap polarity (1 bit)
+ * [25:21]     FG filter (5 bits)
+ * [62:26]     Unused (37 bits)
+ * [63]                RES0 - Must be zero, as lost on insertion in the xarray
+ */
+#define TC_CGT_BITS    10
+#define TC_FGT_BITS    4
+#define TC_FGF_BITS    5
+
+union trap_config {
+       u64     val;
+       struct {
+               unsigned long   cgt:TC_CGT_BITS; /* Coarse Grained Trap id */
+               unsigned long   fgt:TC_FGT_BITS; /* Fine Grained Trap id */
+               unsigned long   bit:6;           /* Bit number */
+               unsigned long   pol:1;           /* Polarity */
+               unsigned long   fgf:TC_FGF_BITS; /* Fine Grained Filter */
+               unsigned long   unused:37;       /* Unused, should be zero */
+               unsigned long   mbz:1;           /* Must Be Zero */
+       };
+};
+
+struct encoding_to_trap_config {
+       const u32                       encoding;
+       const u32                       end;
+       const union trap_config         tc;
+       const unsigned int              line;
+};
+
+#define SR_RANGE_TRAP(sr_start, sr_end, trap_id)                       \
+       {                                                               \
+               .encoding       = sr_start,                             \
+               .end            = sr_end,                               \
+               .tc             = {                                     \
+                       .cgt            = trap_id,                      \
+               },                                                      \
+               .line = __LINE__,                                       \
+       }
+
+#define SR_TRAP(sr, trap_id)           SR_RANGE_TRAP(sr, sr, trap_id)
+
+/*
+ * Map encoding to trap bits for exception reported with EC=0x18.
+ * These must only be evaluated when running a nested hypervisor, but
+ * that the current context is not a hypervisor context. When the
+ * trapped access matches one of the trap controls, the exception is
+ * re-injected in the nested hypervisor.
+ */
+static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
+       SR_TRAP(SYS_REVIDR_EL1,         CGT_HCR_TID1),
+       SR_TRAP(SYS_AIDR_EL1,           CGT_HCR_TID1),
+       SR_TRAP(SYS_SMIDR_EL1,          CGT_HCR_TID1),
+       SR_TRAP(SYS_CTR_EL0,            CGT_HCR_TID2),
+       SR_TRAP(SYS_CCSIDR_EL1,         CGT_HCR_TID2_TID4),
+       SR_TRAP(SYS_CCSIDR2_EL1,        CGT_HCR_TID2_TID4),
+       SR_TRAP(SYS_CLIDR_EL1,          CGT_HCR_TID2_TID4),
+       SR_TRAP(SYS_CSSELR_EL1,         CGT_HCR_TID2_TID4),
+       SR_RANGE_TRAP(SYS_ID_PFR0_EL1,
+                     sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3),
+       SR_TRAP(SYS_ICC_SGI0R_EL1,      CGT_HCR_IMO_FMO),
+       SR_TRAP(SYS_ICC_ASGI1R_EL1,     CGT_HCR_IMO_FMO),
+       SR_TRAP(SYS_ICC_SGI1R_EL1,      CGT_HCR_IMO_FMO),
+       SR_RANGE_TRAP(sys_reg(3, 0, 11, 0, 0),
+                     sys_reg(3, 0, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 1, 11, 0, 0),
+                     sys_reg(3, 1, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 2, 11, 0, 0),
+                     sys_reg(3, 2, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 3, 11, 0, 0),
+                     sys_reg(3, 3, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 4, 11, 0, 0),
+                     sys_reg(3, 4, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 5, 11, 0, 0),
+                     sys_reg(3, 5, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 6, 11, 0, 0),
+                     sys_reg(3, 6, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 7, 11, 0, 0),
+                     sys_reg(3, 7, 11, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 0, 15, 0, 0),
+                     sys_reg(3, 0, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 1, 15, 0, 0),
+                     sys_reg(3, 1, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 2, 15, 0, 0),
+                     sys_reg(3, 2, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 3, 15, 0, 0),
+                     sys_reg(3, 3, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 4, 15, 0, 0),
+                     sys_reg(3, 4, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 5, 15, 0, 0),
+                     sys_reg(3, 5, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 6, 15, 0, 0),
+                     sys_reg(3, 6, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_RANGE_TRAP(sys_reg(3, 7, 15, 0, 0),
+                     sys_reg(3, 7, 15, 15, 7), CGT_HCR_TIDCP),
+       SR_TRAP(SYS_ACTLR_EL1,          CGT_HCR_TACR),
+       SR_TRAP(SYS_DC_ISW,             CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CSW,             CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CISW,            CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_IGSW,            CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_IGDSW,           CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CGSW,            CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CGDSW,           CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CIGSW,           CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CIGDSW,          CGT_HCR_TSW),
+       SR_TRAP(SYS_DC_CIVAC,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CVAC,            CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CVAP,            CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CVADP,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_IVAC,            CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CIGVAC,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CIGDVAC,         CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_IGVAC,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_IGDVAC,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGVAC,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGDVAC,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGVAP,           CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGDVAP,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGVADP,          CGT_HCR_TPC),
+       SR_TRAP(SYS_DC_CGDVADP,         CGT_HCR_TPC),
+       SR_TRAP(SYS_IC_IVAU,            CGT_HCR_TPU_TOCU),
+       SR_TRAP(SYS_IC_IALLU,           CGT_HCR_TPU_TOCU),
+       SR_TRAP(SYS_IC_IALLUIS,         CGT_HCR_TPU_TICAB),
+       SR_TRAP(SYS_DC_CVAU,            CGT_HCR_TPU_TOCU),
+       SR_TRAP(OP_TLBI_RVAE1,          CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAAE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVALE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAALE1,        CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VMALLE1,        CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAE1,           CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_ASIDE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAAE1,          CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VALE1,          CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAALE1,         CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAE1NXS,       CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAAE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVALE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAALE1NXS,     CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VMALLE1NXS,     CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAE1NXS,        CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_ASIDE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAAE1NXS,       CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VALE1NXS,       CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_VAALE1NXS,      CGT_HCR_TTLB),
+       SR_TRAP(OP_TLBI_RVAE1IS,        CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAAE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVALE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAALE1IS,      CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VMALLE1IS,      CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAE1IS,         CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_ASIDE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAAE1IS,        CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VALE1IS,        CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAALE1IS,       CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAE1ISNXS,     CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAAE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVALE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_RVAALE1ISNXS,   CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VMALLE1ISNXS,   CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAE1ISNXS,      CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_ASIDE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAAE1ISNXS,     CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VALE1ISNXS,     CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VAALE1ISNXS,    CGT_HCR_TTLB_TTLBIS),
+       SR_TRAP(OP_TLBI_VMALLE1OS,      CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAE1OS,         CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_ASIDE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAAE1OS,        CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VALE1OS,        CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAALE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAE1OS,        CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAAE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVALE1OS,       CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAALE1OS,      CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VMALLE1OSNXS,   CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAE1OSNXS,      CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_ASIDE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAAE1OSNXS,     CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VALE1OSNXS,     CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_VAALE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAE1OSNXS,     CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAAE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVALE1OSNXS,    CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(OP_TLBI_RVAALE1OSNXS,   CGT_HCR_TTLB_TTLBOS),
+       SR_TRAP(SYS_SCTLR_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_TTBR0_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_TTBR1_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_TCR_EL1,            CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_ESR_EL1,            CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_FAR_EL1,            CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_AFSR0_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_AFSR1_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_MAIR_EL1,           CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_AMAIR_EL1,          CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_CONTEXTIDR_EL1,     CGT_HCR_TVM_TRVM),
+       SR_TRAP(SYS_DC_ZVA,             CGT_HCR_TDZ),
+       SR_TRAP(SYS_DC_GVA,             CGT_HCR_TDZ),
+       SR_TRAP(SYS_DC_GZVA,            CGT_HCR_TDZ),
+       SR_TRAP(SYS_LORSA_EL1,          CGT_HCR_TLOR),
+       SR_TRAP(SYS_LOREA_EL1,          CGT_HCR_TLOR),
+       SR_TRAP(SYS_LORN_EL1,           CGT_HCR_TLOR),
+       SR_TRAP(SYS_LORC_EL1,           CGT_HCR_TLOR),
+       SR_TRAP(SYS_LORID_EL1,          CGT_HCR_TLOR),
+       SR_TRAP(SYS_ERRIDR_EL1,         CGT_HCR_TERR),
+       SR_TRAP(SYS_ERRSELR_EL1,        CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXADDR_EL1,        CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXCTLR_EL1,        CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXFR_EL1,          CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC0_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC1_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC2_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXMISC3_EL1,       CGT_HCR_TERR),
+       SR_TRAP(SYS_ERXSTATUS_EL1,      CGT_HCR_TERR),
+       SR_TRAP(SYS_APIAKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APIAKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APIBKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APIBKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDAKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDAKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDBKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APDBKEYHI_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APGAKEYLO_EL1,      CGT_HCR_APK),
+       SR_TRAP(SYS_APGAKEYHI_EL1,      CGT_HCR_APK),
+       /* All _EL2 registers */
+       SR_RANGE_TRAP(sys_reg(3, 4, 0, 0, 0),
+                     sys_reg(3, 4, 3, 15, 7), CGT_HCR_NV),
+       /* Skip the SP_EL1 encoding... */
+       SR_TRAP(SYS_SPSR_EL2,           CGT_HCR_NV),
+       SR_TRAP(SYS_ELR_EL2,            CGT_HCR_NV),
+       SR_RANGE_TRAP(sys_reg(3, 4, 4, 1, 1),
+                     sys_reg(3, 4, 10, 15, 7), CGT_HCR_NV),
+       SR_RANGE_TRAP(sys_reg(3, 4, 12, 0, 0),
+                     sys_reg(3, 4, 14, 15, 7), CGT_HCR_NV),
+       /* All _EL02, _EL12 registers */
+       SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0),
+                     sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV),
+       SR_RANGE_TRAP(sys_reg(3, 5, 12, 0, 0),
+                     sys_reg(3, 5, 14, 15, 7), CGT_HCR_NV),
+       SR_TRAP(OP_AT_S1E2R,            CGT_HCR_NV),
+       SR_TRAP(OP_AT_S1E2W,            CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E1R,           CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E1W,           CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E0R,           CGT_HCR_NV),
+       SR_TRAP(OP_AT_S12E0W,           CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2,         CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2,           CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2,          CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1NXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1NXS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1NXS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1NXS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2NXS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2NXS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2NXS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1NXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1IS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1IS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1IS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1IS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2IS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2IS,         CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2IS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1IS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1ISNXS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1ISNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1ISNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1ISNXS, CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2ISNXS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2ISNXS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2ISNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1ISNXS,CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2OS,         CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1OS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1OS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1OS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1OS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1OS,    CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2OS,        CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2OS,       CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE2OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VAE2OSNXS,      CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_ALLE1OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VALE2OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_VMALLS12E1OSNXS,CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2E1OSNXS,   CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2E1OSNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_IPAS2LE1OSNXS,  CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RIPAS2LE1OSNXS, CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVAE2OSNXS,     CGT_HCR_NV),
+       SR_TRAP(OP_TLBI_RVALE2OSNXS,    CGT_HCR_NV),
+       SR_TRAP(OP_CPP_RCTX,            CGT_HCR_NV),
+       SR_TRAP(OP_DVP_RCTX,            CGT_HCR_NV),
+       SR_TRAP(OP_CFP_RCTX,            CGT_HCR_NV),
+       SR_TRAP(SYS_SP_EL1,             CGT_HCR_NV_nNV2),
+       SR_TRAP(SYS_VBAR_EL1,           CGT_HCR_NV1_nNV2),
+       SR_TRAP(SYS_ELR_EL1,            CGT_HCR_NV1_nNV2),
+       SR_TRAP(SYS_SPSR_EL1,           CGT_HCR_NV1_nNV2),
+       SR_TRAP(SYS_SCXTNUM_EL1,        CGT_HCR_NV1_nNV2_ENSCXT),
+       SR_TRAP(SYS_SCXTNUM_EL0,        CGT_HCR_ENSCXT),
+       SR_TRAP(OP_AT_S1E1R,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E1W,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E0R,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E0W,            CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E1RP,           CGT_HCR_AT),
+       SR_TRAP(OP_AT_S1E1WP,           CGT_HCR_AT),
+       SR_TRAP(SYS_ERXPFGF_EL1,        CGT_HCR_nFIEN),
+       SR_TRAP(SYS_ERXPFGCTL_EL1,      CGT_HCR_nFIEN),
+       SR_TRAP(SYS_ERXPFGCDN_EL1,      CGT_HCR_nFIEN),
+       SR_TRAP(SYS_PMCR_EL0,           CGT_MDCR_TPM_TPMCR),
+       SR_TRAP(SYS_PMCNTENSET_EL0,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCNTENCLR_EL0,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMOVSSET_EL0,       CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMOVSCLR_EL0,       CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCEID0_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCEID1_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMXEVTYPER_EL0,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMSWINC_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMSELR_EL0,         CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMXEVCNTR_EL0,      CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCCNTR_EL0,        CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMUSERENR_EL0,      CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMINTENSET_EL1,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMINTENCLR_EL1,     CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMMIR_EL1,          CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(0),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(1),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(2),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(3),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(4),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(5),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(6),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(7),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(8),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(9),   CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(10),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(11),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(12),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(13),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(14),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(15),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(16),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(17),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(18),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(19),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(20),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(21),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(22),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(23),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(24),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(25),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(26),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(27),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(28),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(29),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVCNTRn_EL0(30),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(0),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(1),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(2),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(3),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(4),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(5),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(6),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(7),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(8),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(9),  CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(10), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(11), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(12), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(13), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(14), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(15), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(16), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(17), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(18), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(19), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(20), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(21), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(22), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(23), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(24), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(25), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(26), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(27), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(28), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(29), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMEVTYPERn_EL0(30), CGT_MDCR_TPM),
+       SR_TRAP(SYS_PMCCFILTR_EL0,      CGT_MDCR_TPM),
+       SR_TRAP(SYS_MDCCSR_EL0,         CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_MDCCINT_EL1,        CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_OSDTRRX_EL1,        CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_OSDTRTX_EL1,        CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_DBGDTR_EL0,         CGT_MDCR_TDCC_TDE_TDA),
+       /*
+        * Also covers DBGDTRRX_EL0, which has the same encoding as
+        * SYS_DBGDTRTX_EL0...
+        */
+       SR_TRAP(SYS_DBGDTRTX_EL0,       CGT_MDCR_TDCC_TDE_TDA),
+       SR_TRAP(SYS_MDSCR_EL1,          CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_OSECCR_EL1,         CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBVRn_EL1(15),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGBCRn_EL1(15),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWVRn_EL1(15),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(0),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(1),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(2),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(3),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(4),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(5),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(6),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(7),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(8),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(9),     CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(10),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(11),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(12),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(13),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGWCRn_EL1(14),    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGCLAIMSET_EL1,    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGCLAIMCLR_EL1,    CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_DBGAUTHSTATUS_EL1,  CGT_MDCR_TDE_TDA),
+       SR_TRAP(SYS_OSLAR_EL1,          CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_OSLSR_EL1,          CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_OSDLR_EL1,          CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_DBGPRCR_EL1,        CGT_MDCR_TDE_TDOSA),
+       SR_TRAP(SYS_MDRAR_EL1,          CGT_MDCR_TDE_TDRA),
+       SR_TRAP(SYS_PMBLIMITR_EL1,      CGT_MDCR_E2PB),
+       SR_TRAP(SYS_PMBPTR_EL1,         CGT_MDCR_E2PB),
+       SR_TRAP(SYS_PMBSR_EL1,          CGT_MDCR_E2PB),
+       SR_TRAP(SYS_PMSCR_EL1,          CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSEVFR_EL1,        CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSFCR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSICR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSIDR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSIRR_EL1,         CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSLATFR_EL1,       CGT_MDCR_TPMS),
+       SR_TRAP(SYS_PMSNEVFR_EL1,       CGT_MDCR_TPMS),
+       SR_TRAP(SYS_TRFCR_EL1,          CGT_MDCR_TTRF),
+       SR_TRAP(SYS_TRBBASER_EL1,       CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBLIMITR_EL1,      CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBMAR_EL1,         CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBPTR_EL1,         CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBSR_EL1,          CGT_MDCR_E2TB),
+       SR_TRAP(SYS_TRBTRG_EL1,         CGT_MDCR_E2TB),
+       SR_TRAP(SYS_CNTP_TVAL_EL0,      CGT_CNTHCTL_EL1PTEN),
+       SR_TRAP(SYS_CNTP_CVAL_EL0,      CGT_CNTHCTL_EL1PTEN),
+       SR_TRAP(SYS_CNTP_CTL_EL0,       CGT_CNTHCTL_EL1PTEN),
+       SR_TRAP(SYS_CNTPCT_EL0,         CGT_CNTHCTL_EL1PCTEN),
+       SR_TRAP(SYS_CNTPCTSS_EL0,       CGT_CNTHCTL_EL1PCTEN),
+};
+
+static DEFINE_XARRAY(sr_forward_xa);
+
+enum fgt_group_id {
+       __NO_FGT_GROUP__,
+       HFGxTR_GROUP,
+       HDFGRTR_GROUP,
+       HDFGWTR_GROUP,
+       HFGITR_GROUP,
+
+       /* Must be last */
+       __NR_FGT_GROUP_IDS__
+};
+
+enum fg_filter_id {
+       __NO_FGF__,
+       HCRX_FGTnXS,
+
+       /* Must be last */
+       __NR_FG_FILTER_IDS__
+};
+
+#define SR_FGF(sr, g, b, p, f)                                 \
+       {                                                       \
+               .encoding       = sr,                           \
+               .end            = sr,                           \
+               .tc             = {                             \
+                       .fgt = g ## _GROUP,                     \
+                       .bit = g ## _EL2_ ## b ## _SHIFT,       \
+                       .pol = p,                               \
+                       .fgf = f,                               \
+               },                                              \
+               .line = __LINE__,                               \
+       }
+
+#define SR_FGT(sr, g, b, p)    SR_FGF(sr, g, b, p, __NO_FGF__)
+
+static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
+       /* HFGRTR_EL2, HFGWTR_EL2 */
+       SR_FGT(SYS_TPIDR2_EL0,          HFGxTR, nTPIDR2_EL0, 0),
+       SR_FGT(SYS_SMPRI_EL1,           HFGxTR, nSMPRI_EL1, 0),
+       SR_FGT(SYS_ACCDATA_EL1,         HFGxTR, nACCDATA_EL1, 0),
+       SR_FGT(SYS_ERXADDR_EL1,         HFGxTR, ERXADDR_EL1, 1),
+       SR_FGT(SYS_ERXPFGCDN_EL1,       HFGxTR, ERXPFGCDN_EL1, 1),
+       SR_FGT(SYS_ERXPFGCTL_EL1,       HFGxTR, ERXPFGCTL_EL1, 1),
+       SR_FGT(SYS_ERXPFGF_EL1,         HFGxTR, ERXPFGF_EL1, 1),
+       SR_FGT(SYS_ERXMISC0_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXMISC1_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXMISC2_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXMISC3_EL1,        HFGxTR, ERXMISCn_EL1, 1),
+       SR_FGT(SYS_ERXSTATUS_EL1,       HFGxTR, ERXSTATUS_EL1, 1),
+       SR_FGT(SYS_ERXCTLR_EL1,         HFGxTR, ERXCTLR_EL1, 1),
+       SR_FGT(SYS_ERXFR_EL1,           HFGxTR, ERXFR_EL1, 1),
+       SR_FGT(SYS_ERRSELR_EL1,         HFGxTR, ERRSELR_EL1, 1),
+       SR_FGT(SYS_ERRIDR_EL1,          HFGxTR, ERRIDR_EL1, 1),
+       SR_FGT(SYS_ICC_IGRPEN0_EL1,     HFGxTR, ICC_IGRPENn_EL1, 1),
+       SR_FGT(SYS_ICC_IGRPEN1_EL1,     HFGxTR, ICC_IGRPENn_EL1, 1),
+       SR_FGT(SYS_VBAR_EL1,            HFGxTR, VBAR_EL1, 1),
+       SR_FGT(SYS_TTBR1_EL1,           HFGxTR, TTBR1_EL1, 1),
+       SR_FGT(SYS_TTBR0_EL1,           HFGxTR, TTBR0_EL1, 1),
+       SR_FGT(SYS_TPIDR_EL0,           HFGxTR, TPIDR_EL0, 1),
+       SR_FGT(SYS_TPIDRRO_EL0,         HFGxTR, TPIDRRO_EL0, 1),
+       SR_FGT(SYS_TPIDR_EL1,           HFGxTR, TPIDR_EL1, 1),
+       SR_FGT(SYS_TCR_EL1,             HFGxTR, TCR_EL1, 1),
+       SR_FGT(SYS_SCXTNUM_EL0,         HFGxTR, SCXTNUM_EL0, 1),
+       SR_FGT(SYS_SCXTNUM_EL1,         HFGxTR, SCXTNUM_EL1, 1),
+       SR_FGT(SYS_SCTLR_EL1,           HFGxTR, SCTLR_EL1, 1),
+       SR_FGT(SYS_REVIDR_EL1,          HFGxTR, REVIDR_EL1, 1),
+       SR_FGT(SYS_PAR_EL1,             HFGxTR, PAR_EL1, 1),
+       SR_FGT(SYS_MPIDR_EL1,           HFGxTR, MPIDR_EL1, 1),
+       SR_FGT(SYS_MIDR_EL1,            HFGxTR, MIDR_EL1, 1),
+       SR_FGT(SYS_MAIR_EL1,            HFGxTR, MAIR_EL1, 1),
+       SR_FGT(SYS_LORSA_EL1,           HFGxTR, LORSA_EL1, 1),
+       SR_FGT(SYS_LORN_EL1,            HFGxTR, LORN_EL1, 1),
+       SR_FGT(SYS_LORID_EL1,           HFGxTR, LORID_EL1, 1),
+       SR_FGT(SYS_LOREA_EL1,           HFGxTR, LOREA_EL1, 1),
+       SR_FGT(SYS_LORC_EL1,            HFGxTR, LORC_EL1, 1),
+       SR_FGT(SYS_ISR_EL1,             HFGxTR, ISR_EL1, 1),
+       SR_FGT(SYS_FAR_EL1,             HFGxTR, FAR_EL1, 1),
+       SR_FGT(SYS_ESR_EL1,             HFGxTR, ESR_EL1, 1),
+       SR_FGT(SYS_DCZID_EL0,           HFGxTR, DCZID_EL0, 1),
+       SR_FGT(SYS_CTR_EL0,             HFGxTR, CTR_EL0, 1),
+       SR_FGT(SYS_CSSELR_EL1,          HFGxTR, CSSELR_EL1, 1),
+       SR_FGT(SYS_CPACR_EL1,           HFGxTR, CPACR_EL1, 1),
+       SR_FGT(SYS_CONTEXTIDR_EL1,      HFGxTR, CONTEXTIDR_EL1, 1),
+       SR_FGT(SYS_CLIDR_EL1,           HFGxTR, CLIDR_EL1, 1),
+       SR_FGT(SYS_CCSIDR_EL1,          HFGxTR, CCSIDR_EL1, 1),
+       SR_FGT(SYS_APIBKEYLO_EL1,       HFGxTR, APIBKey, 1),
+       SR_FGT(SYS_APIBKEYHI_EL1,       HFGxTR, APIBKey, 1),
+       SR_FGT(SYS_APIAKEYLO_EL1,       HFGxTR, APIAKey, 1),
+       SR_FGT(SYS_APIAKEYHI_EL1,       HFGxTR, APIAKey, 1),
+       SR_FGT(SYS_APGAKEYLO_EL1,       HFGxTR, APGAKey, 1),
+       SR_FGT(SYS_APGAKEYHI_EL1,       HFGxTR, APGAKey, 1),
+       SR_FGT(SYS_APDBKEYLO_EL1,       HFGxTR, APDBKey, 1),
+       SR_FGT(SYS_APDBKEYHI_EL1,       HFGxTR, APDBKey, 1),
+       SR_FGT(SYS_APDAKEYLO_EL1,       HFGxTR, APDAKey, 1),
+       SR_FGT(SYS_APDAKEYHI_EL1,       HFGxTR, APDAKey, 1),
+       SR_FGT(SYS_AMAIR_EL1,           HFGxTR, AMAIR_EL1, 1),
+       SR_FGT(SYS_AIDR_EL1,            HFGxTR, AIDR_EL1, 1),
+       SR_FGT(SYS_AFSR1_EL1,           HFGxTR, AFSR1_EL1, 1),
+       SR_FGT(SYS_AFSR0_EL1,           HFGxTR, AFSR0_EL1, 1),
+       /* HFGITR_EL2 */
+       SR_FGT(OP_BRB_IALL,             HFGITR, nBRBIALL, 0),
+       SR_FGT(OP_BRB_INJ,              HFGITR, nBRBINJ, 0),
+       SR_FGT(SYS_DC_CVAC,             HFGITR, DCCVAC, 1),
+       SR_FGT(SYS_DC_CGVAC,            HFGITR, DCCVAC, 1),
+       SR_FGT(SYS_DC_CGDVAC,           HFGITR, DCCVAC, 1),
+       SR_FGT(OP_CPP_RCTX,             HFGITR, CPPRCTX, 1),
+       SR_FGT(OP_DVP_RCTX,             HFGITR, DVPRCTX, 1),
+       SR_FGT(OP_CFP_RCTX,             HFGITR, CFPRCTX, 1),
+       SR_FGT(OP_TLBI_VAALE1,          HFGITR, TLBIVAALE1, 1),
+       SR_FGT(OP_TLBI_VALE1,           HFGITR, TLBIVALE1, 1),
+       SR_FGT(OP_TLBI_VAAE1,           HFGITR, TLBIVAAE1, 1),
+       SR_FGT(OP_TLBI_ASIDE1,          HFGITR, TLBIASIDE1, 1),
+       SR_FGT(OP_TLBI_VAE1,            HFGITR, TLBIVAE1, 1),
+       SR_FGT(OP_TLBI_VMALLE1,         HFGITR, TLBIVMALLE1, 1),
+       SR_FGT(OP_TLBI_RVAALE1,         HFGITR, TLBIRVAALE1, 1),
+       SR_FGT(OP_TLBI_RVALE1,          HFGITR, TLBIRVALE1, 1),
+       SR_FGT(OP_TLBI_RVAAE1,          HFGITR, TLBIRVAAE1, 1),
+       SR_FGT(OP_TLBI_RVAE1,           HFGITR, TLBIRVAE1, 1),
+       SR_FGT(OP_TLBI_RVAALE1IS,       HFGITR, TLBIRVAALE1IS, 1),
+       SR_FGT(OP_TLBI_RVALE1IS,        HFGITR, TLBIRVALE1IS, 1),
+       SR_FGT(OP_TLBI_RVAAE1IS,        HFGITR, TLBIRVAAE1IS, 1),
+       SR_FGT(OP_TLBI_RVAE1IS,         HFGITR, TLBIRVAE1IS, 1),
+       SR_FGT(OP_TLBI_VAALE1IS,        HFGITR, TLBIVAALE1IS, 1),
+       SR_FGT(OP_TLBI_VALE1IS,         HFGITR, TLBIVALE1IS, 1),
+       SR_FGT(OP_TLBI_VAAE1IS,         HFGITR, TLBIVAAE1IS, 1),
+       SR_FGT(OP_TLBI_ASIDE1IS,        HFGITR, TLBIASIDE1IS, 1),
+       SR_FGT(OP_TLBI_VAE1IS,          HFGITR, TLBIVAE1IS, 1),
+       SR_FGT(OP_TLBI_VMALLE1IS,       HFGITR, TLBIVMALLE1IS, 1),
+       SR_FGT(OP_TLBI_RVAALE1OS,       HFGITR, TLBIRVAALE1OS, 1),
+       SR_FGT(OP_TLBI_RVALE1OS,        HFGITR, TLBIRVALE1OS, 1),
+       SR_FGT(OP_TLBI_RVAAE1OS,        HFGITR, TLBIRVAAE1OS, 1),
+       SR_FGT(OP_TLBI_RVAE1OS,         HFGITR, TLBIRVAE1OS, 1),
+       SR_FGT(OP_TLBI_VAALE1OS,        HFGITR, TLBIVAALE1OS, 1),
+       SR_FGT(OP_TLBI_VALE1OS,         HFGITR, TLBIVALE1OS, 1),
+       SR_FGT(OP_TLBI_VAAE1OS,         HFGITR, TLBIVAAE1OS, 1),
+       SR_FGT(OP_TLBI_ASIDE1OS,        HFGITR, TLBIASIDE1OS, 1),
+       SR_FGT(OP_TLBI_VAE1OS,          HFGITR, TLBIVAE1OS, 1),
+       SR_FGT(OP_TLBI_VMALLE1OS,       HFGITR, TLBIVMALLE1OS, 1),
+       /* nXS variants must be checked against HCRX_EL2.FGTnXS */
+       SR_FGF(OP_TLBI_VAALE1NXS,       HFGITR, TLBIVAALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VALE1NXS,        HFGITR, TLBIVALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAAE1NXS,        HFGITR, TLBIVAAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_ASIDE1NXS,       HFGITR, TLBIASIDE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAE1NXS,         HFGITR, TLBIVAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VMALLE1NXS,      HFGITR, TLBIVMALLE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAALE1NXS,      HFGITR, TLBIRVAALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVALE1NXS,       HFGITR, TLBIRVALE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAAE1NXS,       HFGITR, TLBIRVAAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAE1NXS,        HFGITR, TLBIRVAE1, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAALE1ISNXS,    HFGITR, TLBIRVAALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVALE1ISNXS,     HFGITR, TLBIRVALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAAE1ISNXS,     HFGITR, TLBIRVAAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAE1ISNXS,      HFGITR, TLBIRVAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAALE1ISNXS,     HFGITR, TLBIVAALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VALE1ISNXS,      HFGITR, TLBIVALE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAAE1ISNXS,      HFGITR, TLBIVAAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_ASIDE1ISNXS,     HFGITR, TLBIASIDE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAE1ISNXS,       HFGITR, TLBIVAE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VMALLE1ISNXS,    HFGITR, TLBIVMALLE1IS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAALE1OSNXS,    HFGITR, TLBIRVAALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVALE1OSNXS,     HFGITR, TLBIRVALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAAE1OSNXS,     HFGITR, TLBIRVAAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_RVAE1OSNXS,      HFGITR, TLBIRVAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAALE1OSNXS,     HFGITR, TLBIVAALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VALE1OSNXS,      HFGITR, TLBIVALE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAAE1OSNXS,      HFGITR, TLBIVAAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_ASIDE1OSNXS,     HFGITR, TLBIASIDE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VAE1OSNXS,       HFGITR, TLBIVAE1OS, 1, HCRX_FGTnXS),
+       SR_FGF(OP_TLBI_VMALLE1OSNXS,    HFGITR, TLBIVMALLE1OS, 1, HCRX_FGTnXS),
+       SR_FGT(OP_AT_S1E1WP,            HFGITR, ATS1E1WP, 1),
+       SR_FGT(OP_AT_S1E1RP,            HFGITR, ATS1E1RP, 1),
+       SR_FGT(OP_AT_S1E0W,             HFGITR, ATS1E0W, 1),
+       SR_FGT(OP_AT_S1E0R,             HFGITR, ATS1E0R, 1),
+       SR_FGT(OP_AT_S1E1W,             HFGITR, ATS1E1W, 1),
+       SR_FGT(OP_AT_S1E1R,             HFGITR, ATS1E1R, 1),
+       SR_FGT(SYS_DC_ZVA,              HFGITR, DCZVA, 1),
+       SR_FGT(SYS_DC_GVA,              HFGITR, DCZVA, 1),
+       SR_FGT(SYS_DC_GZVA,             HFGITR, DCZVA, 1),
+       SR_FGT(SYS_DC_CIVAC,            HFGITR, DCCIVAC, 1),
+       SR_FGT(SYS_DC_CIGVAC,           HFGITR, DCCIVAC, 1),
+       SR_FGT(SYS_DC_CIGDVAC,          HFGITR, DCCIVAC, 1),
+       SR_FGT(SYS_DC_CVADP,            HFGITR, DCCVADP, 1),
+       SR_FGT(SYS_DC_CGVADP,           HFGITR, DCCVADP, 1),
+       SR_FGT(SYS_DC_CGDVADP,          HFGITR, DCCVADP, 1),
+       SR_FGT(SYS_DC_CVAP,             HFGITR, DCCVAP, 1),
+       SR_FGT(SYS_DC_CGVAP,            HFGITR, DCCVAP, 1),
+       SR_FGT(SYS_DC_CGDVAP,           HFGITR, DCCVAP, 1),
+       SR_FGT(SYS_DC_CVAU,             HFGITR, DCCVAU, 1),
+       SR_FGT(SYS_DC_CISW,             HFGITR, DCCISW, 1),
+       SR_FGT(SYS_DC_CIGSW,            HFGITR, DCCISW, 1),
+       SR_FGT(SYS_DC_CIGDSW,           HFGITR, DCCISW, 1),
+       SR_FGT(SYS_DC_CSW,              HFGITR, DCCSW, 1),
+       SR_FGT(SYS_DC_CGSW,             HFGITR, DCCSW, 1),
+       SR_FGT(SYS_DC_CGDSW,            HFGITR, DCCSW, 1),
+       SR_FGT(SYS_DC_ISW,              HFGITR, DCISW, 1),
+       SR_FGT(SYS_DC_IGSW,             HFGITR, DCISW, 1),
+       SR_FGT(SYS_DC_IGDSW,            HFGITR, DCISW, 1),
+       SR_FGT(SYS_DC_IVAC,             HFGITR, DCIVAC, 1),
+       SR_FGT(SYS_DC_IGVAC,            HFGITR, DCIVAC, 1),
+       SR_FGT(SYS_DC_IGDVAC,           HFGITR, DCIVAC, 1),
+       SR_FGT(SYS_IC_IVAU,             HFGITR, ICIVAU, 1),
+       SR_FGT(SYS_IC_IALLU,            HFGITR, ICIALLU, 1),
+       SR_FGT(SYS_IC_IALLUIS,          HFGITR, ICIALLUIS, 1),
+       /* HDFGRTR_EL2 */
+       SR_FGT(SYS_PMBIDR_EL1,          HDFGRTR, PMBIDR_EL1, 1),
+       SR_FGT(SYS_PMSNEVFR_EL1,        HDFGRTR, nPMSNEVFR_EL1, 0),
+       SR_FGT(SYS_BRBINF_EL1(0),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(1),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(2),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(3),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(4),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(5),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(6),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(7),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(8),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(9),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(10),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(11),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(12),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(13),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(14),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(15),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(16),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(17),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(18),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(19),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(20),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(21),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(22),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(23),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(24),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(25),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(26),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(27),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(28),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(29),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(30),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINF_EL1(31),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBINFINJ_EL1,       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(0),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(1),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(2),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(3),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(4),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(5),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(6),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(7),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(8),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(9),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(10),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(11),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(12),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(13),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(14),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(15),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(16),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(17),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(18),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(19),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(20),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(21),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(22),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(23),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(24),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(25),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(26),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(27),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(28),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(29),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(30),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRC_EL1(31),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBSRCINJ_EL1,       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(0),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(1),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(2),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(3),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(4),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(5),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(6),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(7),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(8),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(9),       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(10),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(11),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(12),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(13),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(14),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(15),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(16),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(17),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(18),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(19),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(20),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(21),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(22),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(23),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(24),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(25),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(26),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(27),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(28),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(29),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(30),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGT_EL1(31),      HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTGTINJ_EL1,       HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBTS_EL1,           HDFGRTR, nBRBDATA, 0),
+       SR_FGT(SYS_BRBCR_EL1,           HDFGRTR, nBRBCTL, 0),
+       SR_FGT(SYS_BRBFCR_EL1,          HDFGRTR, nBRBCTL, 0),
+       SR_FGT(SYS_BRBIDR0_EL1,         HDFGRTR, nBRBIDR, 0),
+       SR_FGT(SYS_PMCEID0_EL0,         HDFGRTR, PMCEIDn_EL0, 1),
+       SR_FGT(SYS_PMCEID1_EL0,         HDFGRTR, PMCEIDn_EL0, 1),
+       SR_FGT(SYS_PMUSERENR_EL0,       HDFGRTR, PMUSERENR_EL0, 1),
+       SR_FGT(SYS_TRBTRG_EL1,          HDFGRTR, TRBTRG_EL1, 1),
+       SR_FGT(SYS_TRBSR_EL1,           HDFGRTR, TRBSR_EL1, 1),
+       SR_FGT(SYS_TRBPTR_EL1,          HDFGRTR, TRBPTR_EL1, 1),
+       SR_FGT(SYS_TRBMAR_EL1,          HDFGRTR, TRBMAR_EL1, 1),
+       SR_FGT(SYS_TRBLIMITR_EL1,       HDFGRTR, TRBLIMITR_EL1, 1),
+       SR_FGT(SYS_TRBIDR_EL1,          HDFGRTR, TRBIDR_EL1, 1),
+       SR_FGT(SYS_TRBBASER_EL1,        HDFGRTR, TRBBASER_EL1, 1),
+       SR_FGT(SYS_TRCVICTLR,           HDFGRTR, TRCVICTLR, 1),
+       SR_FGT(SYS_TRCSTATR,            HDFGRTR, TRCSTATR, 1),
+       SR_FGT(SYS_TRCSSCSR(0),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(1),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(2),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(3),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(4),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(5),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(6),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSSCSR(7),         HDFGRTR, TRCSSCSRn, 1),
+       SR_FGT(SYS_TRCSEQSTR,           HDFGRTR, TRCSEQSTR, 1),
+       SR_FGT(SYS_TRCPRGCTLR,          HDFGRTR, TRCPRGCTLR, 1),
+       SR_FGT(SYS_TRCOSLSR,            HDFGRTR, TRCOSLSR, 1),
+       SR_FGT(SYS_TRCIMSPEC(0),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(1),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(2),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(3),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(4),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(5),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(6),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCIMSPEC(7),        HDFGRTR, TRCIMSPECn, 1),
+       SR_FGT(SYS_TRCDEVARCH,          HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCDEVID,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR0,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR1,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR2,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR3,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR4,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR5,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR6,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR7,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR8,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR9,             HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR10,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR11,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR12,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCIDR13,            HDFGRTR, TRCID, 1),
+       SR_FGT(SYS_TRCCNTVR(0),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCNTVR(1),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCNTVR(2),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCNTVR(3),         HDFGRTR, TRCCNTVRn, 1),
+       SR_FGT(SYS_TRCCLAIMCLR,         HDFGRTR, TRCCLAIM, 1),
+       SR_FGT(SYS_TRCCLAIMSET,         HDFGRTR, TRCCLAIM, 1),
+       SR_FGT(SYS_TRCAUXCTLR,          HDFGRTR, TRCAUXCTLR, 1),
+       SR_FGT(SYS_TRCAUTHSTATUS,       HDFGRTR, TRCAUTHSTATUS, 1),
+       SR_FGT(SYS_TRCACATR(0),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(1),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(2),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(3),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(4),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(5),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(6),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(7),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(8),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(9),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(10),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(11),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(12),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(13),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(14),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACATR(15),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(0),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(1),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(2),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(3),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(4),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(5),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(6),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(7),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(8),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(9),          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(10),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(11),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(12),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(13),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(14),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCACVR(15),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCBBCTLR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCCCTLR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCCTLR0,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCCTLR1,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(0),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(1),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(2),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(3),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(4),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(5),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(6),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCIDCVR(7),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(0),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(1),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(2),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTCTLR(3),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(0),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(1),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(2),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCNTRLDVR(3),      HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCCONFIGR,          HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEVENTCTL0R,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEVENTCTL1R,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(0),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(1),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(2),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCEXTINSELR(3),     HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCQCTLR,            HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(2),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(3),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(4),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(5),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(6),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(7),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(8),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(9),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(10),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(11),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(12),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(13),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(14),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(15),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(16),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(17),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(18),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(19),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(20),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(21),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(22),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(23),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(24),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(25),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(26),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(27),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(28),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(29),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(30),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSCTLR(31),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCRSR,              HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQEVR(0),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQEVR(1),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQEVR(2),        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSEQRSTEVR,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(0),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(1),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(2),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(3),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(4),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(5),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(6),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSCCR(7),         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(0),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(1),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(2),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(3),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(4),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(5),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(6),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSSPCICR(7),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSTALLCTLR,        HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCSYNCPR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCTRACEIDR,         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCTSCTLR,           HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVIIECTLR,         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVIPCSSCTLR,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVISSCTLR,         HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCCTLR0,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCCTLR1,       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(0),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(1),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(2),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(3),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(4),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(5),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(6),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_TRCVMIDCVR(7),       HDFGRTR, TRC, 1),
+       SR_FGT(SYS_PMSLATFR_EL1,        HDFGRTR, PMSLATFR_EL1, 1),
+       SR_FGT(SYS_PMSIRR_EL1,          HDFGRTR, PMSIRR_EL1, 1),
+       SR_FGT(SYS_PMSIDR_EL1,          HDFGRTR, PMSIDR_EL1, 1),
+       SR_FGT(SYS_PMSICR_EL1,          HDFGRTR, PMSICR_EL1, 1),
+       SR_FGT(SYS_PMSFCR_EL1,          HDFGRTR, PMSFCR_EL1, 1),
+       SR_FGT(SYS_PMSEVFR_EL1,         HDFGRTR, PMSEVFR_EL1, 1),
+       SR_FGT(SYS_PMSCR_EL1,           HDFGRTR, PMSCR_EL1, 1),
+       SR_FGT(SYS_PMBSR_EL1,           HDFGRTR, PMBSR_EL1, 1),
+       SR_FGT(SYS_PMBPTR_EL1,          HDFGRTR, PMBPTR_EL1, 1),
+       SR_FGT(SYS_PMBLIMITR_EL1,       HDFGRTR, PMBLIMITR_EL1, 1),
+       SR_FGT(SYS_PMMIR_EL1,           HDFGRTR, PMMIR_EL1, 1),
+       SR_FGT(SYS_PMSELR_EL0,          HDFGRTR, PMSELR_EL0, 1),
+       SR_FGT(SYS_PMOVSCLR_EL0,        HDFGRTR, PMOVS, 1),
+       SR_FGT(SYS_PMOVSSET_EL0,        HDFGRTR, PMOVS, 1),
+       SR_FGT(SYS_PMINTENCLR_EL1,      HDFGRTR, PMINTEN, 1),
+       SR_FGT(SYS_PMINTENSET_EL1,      HDFGRTR, PMINTEN, 1),
+       SR_FGT(SYS_PMCNTENCLR_EL0,      HDFGRTR, PMCNTEN, 1),
+       SR_FGT(SYS_PMCNTENSET_EL0,      HDFGRTR, PMCNTEN, 1),
+       SR_FGT(SYS_PMCCNTR_EL0,         HDFGRTR, PMCCNTR_EL0, 1),
+       SR_FGT(SYS_PMCCFILTR_EL0,       HDFGRTR, PMCCFILTR_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(0),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(1),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(2),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(3),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(4),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(5),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(6),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(7),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(8),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(9),   HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(10),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(11),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(12),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(13),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(14),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(15),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(16),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(17),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(18),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(19),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(20),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(21),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(22),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(23),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(24),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(25),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(26),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(27),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(28),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(29),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVTYPERn_EL0(30),  HDFGRTR, PMEVTYPERn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(0),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(1),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(2),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(3),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(4),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(5),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(6),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(7),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(8),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(9),    HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(10),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(11),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(12),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(13),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(14),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(15),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(16),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(17),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(18),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(19),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(20),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(21),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(22),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(23),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(24),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(25),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(26),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(27),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(28),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(29),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_PMEVCNTRn_EL0(30),   HDFGRTR, PMEVCNTRn_EL0, 1),
+       SR_FGT(SYS_OSDLR_EL1,           HDFGRTR, OSDLR_EL1, 1),
+       SR_FGT(SYS_OSECCR_EL1,          HDFGRTR, OSECCR_EL1, 1),
+       SR_FGT(SYS_OSLSR_EL1,           HDFGRTR, OSLSR_EL1, 1),
+       SR_FGT(SYS_DBGPRCR_EL1,         HDFGRTR, DBGPRCR_EL1, 1),
+       SR_FGT(SYS_DBGAUTHSTATUS_EL1,   HDFGRTR, DBGAUTHSTATUS_EL1, 1),
+       SR_FGT(SYS_DBGCLAIMSET_EL1,     HDFGRTR, DBGCLAIM, 1),
+       SR_FGT(SYS_DBGCLAIMCLR_EL1,     HDFGRTR, DBGCLAIM, 1),
+       SR_FGT(SYS_MDSCR_EL1,           HDFGRTR, MDSCR_EL1, 1),
+       /*
+        * The trap bits capture *64* debug registers per bit, but the
+        * ARM ARM only describes the encoding for the first 16, and
+        * we don't really support more than that anyway.
+        */
+       SR_FGT(SYS_DBGWVRn_EL1(0),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(1),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(2),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(3),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(4),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(5),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(6),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(7),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(8),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(9),      HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(10),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(11),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(12),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(13),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(14),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWVRn_EL1(15),     HDFGRTR, DBGWVRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(0),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(1),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(2),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(3),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(4),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(5),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(6),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(7),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(8),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(9),      HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(10),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(11),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(12),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(13),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(14),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGWCRn_EL1(15),     HDFGRTR, DBGWCRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(0),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(1),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(2),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(3),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(4),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(5),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(6),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(7),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(8),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(9),      HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(10),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(11),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(12),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(13),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(14),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBVRn_EL1(15),     HDFGRTR, DBGBVRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(0),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(1),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(2),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(3),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(4),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(5),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(6),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(7),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(8),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(9),      HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(10),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(11),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(12),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(13),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(14),     HDFGRTR, DBGBCRn_EL1, 1),
+       SR_FGT(SYS_DBGBCRn_EL1(15),     HDFGRTR, DBGBCRn_EL1, 1),
+       /*
+        * HDFGWTR_EL2
+        *
+        * Although HDFGRTR_EL2 and HDFGWTR_EL2 registers largely
+        * overlap in their bit assignment, there are a number of bits
+        * that are RES0 on one side, and an actual trap bit on the
+        * other.  The policy chosen here is to describe all the
+        * read-side mappings, and only the write-side mappings that
+        * differ from the read side, and the trap handler will pick
+        * the correct shadow register based on the access type.
+        */
+       SR_FGT(SYS_TRFCR_EL1,           HDFGWTR, TRFCR_EL1, 1),
+       SR_FGT(SYS_TRCOSLAR,            HDFGWTR, TRCOSLAR, 1),
+       SR_FGT(SYS_PMCR_EL0,            HDFGWTR, PMCR_EL0, 1),
+       SR_FGT(SYS_PMSWINC_EL0,         HDFGWTR, PMSWINC_EL0, 1),
+       SR_FGT(SYS_OSLAR_EL1,           HDFGWTR, OSLAR_EL1, 1),
+};
+
+static union trap_config get_trap_config(u32 sysreg)
+{
+       return (union trap_config) {
+               .val = xa_to_value(xa_load(&sr_forward_xa, sysreg)),
+       };
+}
+
+static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc,
+                                      const char *type, int err)
+{
+       kvm_err("%s line %d encoding range "
+               "(%d, %d, %d, %d, %d) - (%d, %d, %d, %d, %d) (err=%d)\n",
+               type, tc->line,
+               sys_reg_Op0(tc->encoding), sys_reg_Op1(tc->encoding),
+               sys_reg_CRn(tc->encoding), sys_reg_CRm(tc->encoding),
+               sys_reg_Op2(tc->encoding),
+               sys_reg_Op0(tc->end), sys_reg_Op1(tc->end),
+               sys_reg_CRn(tc->end), sys_reg_CRm(tc->end),
+               sys_reg_Op2(tc->end),
+               err);
+}
+
+int __init populate_nv_trap_config(void)
+{
+       int ret = 0;
+
+       BUILD_BUG_ON(sizeof(union trap_config) != sizeof(void *));
+       BUILD_BUG_ON(__NR_CGT_GROUP_IDS__ > BIT(TC_CGT_BITS));
+       BUILD_BUG_ON(__NR_FGT_GROUP_IDS__ > BIT(TC_FGT_BITS));
+       BUILD_BUG_ON(__NR_FG_FILTER_IDS__ > BIT(TC_FGF_BITS));
+
+       for (int i = 0; i < ARRAY_SIZE(encoding_to_cgt); i++) {
+               const struct encoding_to_trap_config *cgt = &encoding_to_cgt[i];
+               void *prev;
+
+               if (cgt->tc.val & BIT(63)) {
+                       kvm_err("CGT[%d] has MBZ bit set\n", i);
+                       ret = -EINVAL;
+               }
+
+               if (cgt->encoding != cgt->end) {
+                       prev = xa_store_range(&sr_forward_xa,
+                                             cgt->encoding, cgt->end,
+                                             xa_mk_value(cgt->tc.val),
+                                             GFP_KERNEL);
+               } else {
+                       prev = xa_store(&sr_forward_xa, cgt->encoding,
+                                       xa_mk_value(cgt->tc.val), GFP_KERNEL);
+                       if (prev && !xa_is_err(prev)) {
+                               ret = -EINVAL;
+                               print_nv_trap_error(cgt, "Duplicate CGT", ret);
+                       }
+               }
+
+               if (xa_is_err(prev)) {
+                       ret = xa_err(prev);
+                       print_nv_trap_error(cgt, "Failed CGT insertion", ret);
+               }
+       }
+
+       kvm_info("nv: %ld coarse grained trap handlers\n",
+                ARRAY_SIZE(encoding_to_cgt));
+
+       if (!cpus_have_final_cap(ARM64_HAS_FGT))
+               goto check_mcb;
+
+       for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) {
+               const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i];
+               union trap_config tc;
+
+               if (fgt->tc.fgt >= __NR_FGT_GROUP_IDS__) {
+                       ret = -EINVAL;
+                       print_nv_trap_error(fgt, "Invalid FGT", ret);
+               }
+
+               tc = get_trap_config(fgt->encoding);
+
+               if (tc.fgt) {
+                       ret = -EINVAL;
+                       print_nv_trap_error(fgt, "Duplicate FGT", ret);
+               }
+
+               tc.val |= fgt->tc.val;
+               xa_store(&sr_forward_xa, fgt->encoding,
+                        xa_mk_value(tc.val), GFP_KERNEL);
+       }
+
+       kvm_info("nv: %ld fine grained trap handlers\n",
+                ARRAY_SIZE(encoding_to_fgt));
+
+check_mcb:
+       for (int id = __MULTIPLE_CONTROL_BITS__; id < __COMPLEX_CONDITIONS__; id++) {
+               const enum cgt_group_id *cgids;
+
+               cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+
+               for (int i = 0; cgids[i] != __RESERVED__; i++) {
+                       if (cgids[i] >= __MULTIPLE_CONTROL_BITS__) {
+                               kvm_err("Recursive MCB %d/%d\n", id, cgids[i]);
+                               ret = -EINVAL;
+                       }
+               }
+       }
+
+       if (ret)
+               xa_destroy(&sr_forward_xa);
+
+       return ret;
+}
+
+static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu,
+                                        const struct trap_bits *tb)
+{
+       enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+       u64 val;
+
+       val = __vcpu_sys_reg(vcpu, tb->index);
+       if ((val & tb->mask) == tb->value)
+               b |= tb->behaviour;
+
+       return b;
+}
+
+static enum trap_behaviour __compute_trap_behaviour(struct kvm_vcpu *vcpu,
+                                                   const enum cgt_group_id id,
+                                                   enum trap_behaviour b)
+{
+       switch (id) {
+               const enum cgt_group_id *cgids;
+
+       case __RESERVED__ ... __MULTIPLE_CONTROL_BITS__ - 1:
+               if (likely(id != __RESERVED__))
+                       b |= get_behaviour(vcpu, &coarse_trap_bits[id]);
+               break;
+       case __MULTIPLE_CONTROL_BITS__ ... __COMPLEX_CONDITIONS__ - 1:
+               /* Yes, this is recursive. Don't do anything stupid. */
+               cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+               for (int i = 0; cgids[i] != __RESERVED__; i++)
+                       b |= __compute_trap_behaviour(vcpu, cgids[i], b);
+               break;
+       default:
+               if (ARRAY_SIZE(ccc))
+                       b |= ccc[id -  __COMPLEX_CONDITIONS__](vcpu);
+               break;
+       }
+
+       return b;
+}
+
+static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu,
+                                                 const union trap_config tc)
+{
+       enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+
+       return __compute_trap_behaviour(vcpu, tc.cgt, b);
+}
+
+static bool check_fgt_bit(u64 val, const union trap_config tc)
+{
+       return ((val >> tc.bit) & 1) == tc.pol;
+}
+
+#define sanitised_sys_reg(vcpu, reg)                   \
+       ({                                              \
+               u64 __val;                              \
+               __val = __vcpu_sys_reg(vcpu, reg);      \
+               __val &= ~__ ## reg ## _RES0;           \
+               (__val);                                \
+       })
+
+bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
+{
+       union trap_config tc;
+       enum trap_behaviour b;
+       bool is_read;
+       u32 sysreg;
+       u64 esr, val;
+
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               return false;
+
+       esr = kvm_vcpu_get_esr(vcpu);
+       sysreg = esr_sys64_to_sysreg(esr);
+       is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+
+       tc = get_trap_config(sysreg);
+
+       /*
+        * A value of 0 for the whole entry means that we know nothing
+        * for this sysreg, and that it cannot be re-injected into the
+        * nested hypervisor. In this situation, let's cut it short.
+        *
+        * Note that ultimately, we could also make use of the xarray
+        * to store the index of the sysreg in the local descriptor
+        * array, avoiding another search... Hint, hint...
+        */
+       if (!tc.val)
+               return false;
+
+       switch ((enum fgt_group_id)tc.fgt) {
+       case __NO_FGT_GROUP__:
+               break;
+
+       case HFGxTR_GROUP:
+               if (is_read)
+                       val = sanitised_sys_reg(vcpu, HFGRTR_EL2);
+               else
+                       val = sanitised_sys_reg(vcpu, HFGWTR_EL2);
+               break;
+
+       case HDFGRTR_GROUP:
+       case HDFGWTR_GROUP:
+               if (is_read)
+                       val = sanitised_sys_reg(vcpu, HDFGRTR_EL2);
+               else
+                       val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
+               break;
+
+       case HFGITR_GROUP:
+               val = sanitised_sys_reg(vcpu, HFGITR_EL2);
+               switch (tc.fgf) {
+                       u64 tmp;
+
+               case __NO_FGF__:
+                       break;
+
+               case HCRX_FGTnXS:
+                       tmp = sanitised_sys_reg(vcpu, HCRX_EL2);
+                       if (tmp & HCRX_EL2_FGTnXS)
+                               tc.fgt = __NO_FGT_GROUP__;
+               }
+               break;
+
+       case __NR_FGT_GROUP_IDS__:
+               /* Something is really wrong, bail out */
+               WARN_ONCE(1, "__NR_FGT_GROUP_IDS__");
+               return false;
+       }
+
+       if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(val, tc))
+               goto inject;
+
+       b = compute_trap_behaviour(vcpu, tc);
+
+       if (((b & BEHAVE_FORWARD_READ) && is_read) ||
+           ((b & BEHAVE_FORWARD_WRITE) && !is_read))
+               goto inject;
+
+       return false;
+
+inject:
+       trace_kvm_forward_sysreg_trap(vcpu, sysreg, is_read);
+
+       kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+       return true;
+}
+
 static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
 {
        u64 mode = spsr & PSR_MODE_MASK;
index 20280a5..95f6945 100644 (file)
@@ -884,21 +884,6 @@ u32 __attribute_const__ kvm_target_cpu(void)
        return KVM_ARM_TARGET_GENERIC_V8;
 }
 
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
-{
-       u32 target = kvm_target_cpu();
-
-       memset(init, 0, sizeof(*init));
-
-       /*
-        * For now, we don't return any features.
-        * In future, we might use features to return target
-        * specific features available for the preferred
-        * target type.
-        */
-       init->target = (__u32)target;
-}
-
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
        return -EINVAL;
index 6dcd660..617ae6d 100644 (file)
@@ -222,7 +222,33 @@ static int kvm_handle_eret(struct kvm_vcpu *vcpu)
        if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
                return kvm_handle_ptrauth(vcpu);
 
-       kvm_emulate_nested_eret(vcpu);
+       /*
+        * If we got here, two possibilities:
+        *
+        * - the guest is in EL2, and we need to fully emulate ERET
+        *
+        * - the guest is in EL1, and we need to reinject the
+         *   exception into the L1 hypervisor.
+        *
+        * If KVM ever traps ERET for its own use, we'll have to
+        * revisit this.
+        */
+       if (is_hyp_ctxt(vcpu))
+               kvm_emulate_nested_eret(vcpu);
+       else
+               kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+
+       return 1;
+}
+
+static int handle_svc(struct kvm_vcpu *vcpu)
+{
+       /*
+        * So far, SVC traps only for NV via HFGITR_EL2. A SVC from a
+        * 32bit guest would be caught by vpcu_mode_is_bad_32bit(), so
+        * we should only have to deal with a 64 bit exception.
+        */
+       kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
        return 1;
 }
 
@@ -239,6 +265,7 @@ static exit_handle_fn arm_exit_handlers[] = {
        [ESR_ELx_EC_SMC32]      = handle_smc,
        [ESR_ELx_EC_HVC64]      = handle_hvc,
        [ESR_ELx_EC_SMC64]      = handle_smc,
+       [ESR_ELx_EC_SVC64]      = handle_svc,
        [ESR_ELx_EC_SYS64]      = kvm_handle_sys_reg,
        [ESR_ELx_EC_SVE]        = handle_sve,
        [ESR_ELx_EC_ERET]       = kvm_handle_eret,
index 34f222a..9cfe6bd 100644 (file)
@@ -70,20 +70,26 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
        }
 }
 
-static inline bool __hfgxtr_traps_required(void)
-{
-       if (cpus_have_final_cap(ARM64_SME))
-               return true;
-
-       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-               return true;
+#define compute_clr_set(vcpu, reg, clr, set)                           \
+       do {                                                            \
+               u64 hfg;                                                \
+               hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0;  \
+               set |= hfg & __ ## reg ## _MASK;                        \
+               clr |= ~hfg & __ ## reg ## _nMASK;                      \
+       } while(0)
 
-       return false;
-}
 
-static inline void __activate_traps_hfgxtr(void)
+static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
+       struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
        u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+       u64 r_val, w_val;
+
+       if (!cpus_have_final_cap(ARM64_HAS_FGT))
+               return;
+
+       ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
+       ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
 
        if (cpus_have_final_cap(ARM64_SME)) {
                tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
@@ -98,26 +104,72 @@ static inline void __activate_traps_hfgxtr(void)
        if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
                w_set |= HFGxTR_EL2_TCR_EL1_MASK;
 
-       sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
-       sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+       if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+               compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
+               compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
+       }
+
+       /* The default is not to trap anything but ACCDATA_EL1 */
+       r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+       r_val |= r_set;
+       r_val &= ~r_clr;
+
+       w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+       w_val |= w_set;
+       w_val &= ~w_clr;
+
+       write_sysreg_s(r_val, SYS_HFGRTR_EL2);
+       write_sysreg_s(w_val, SYS_HFGWTR_EL2);
+
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               return;
+
+       ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
+
+       r_set = r_clr = 0;
+       compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
+       r_val = __HFGITR_EL2_nMASK;
+       r_val |= r_set;
+       r_val &= ~r_clr;
+
+       write_sysreg_s(r_val, SYS_HFGITR_EL2);
+
+       ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
+       ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
+
+       r_clr = r_set = w_clr = w_set = 0;
+
+       compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
+       compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
+
+       r_val = __HDFGRTR_EL2_nMASK;
+       r_val |= r_set;
+       r_val &= ~r_clr;
+
+       w_val = __HDFGWTR_EL2_nMASK;
+       w_val |= w_set;
+       w_val &= ~w_clr;
+
+       write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
+       write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
 }
 
-static inline void __deactivate_traps_hfgxtr(void)
+static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
-       u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+       struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
 
-       if (cpus_have_final_cap(ARM64_SME)) {
-               tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
+       if (!cpus_have_final_cap(ARM64_HAS_FGT))
+               return;
 
-               r_set |= tmp;
-               w_set |= tmp;
-       }
+       write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
 
-       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-               w_clr |= HFGxTR_EL2_TCR_EL1_MASK;
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               return;
 
-       sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
-       sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
+       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
 }
 
 static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -145,8 +197,21 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
        vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
        write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 
-       if (__hfgxtr_traps_required())
-               __activate_traps_hfgxtr();
+       if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+               u64 hcrx = HCRX_GUEST_FLAGS;
+               if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+                       u64 clr = 0, set = 0;
+
+                       compute_clr_set(vcpu, HCRX_EL2, clr, set);
+
+                       hcrx |= set;
+                       hcrx &= ~clr;
+               }
+
+               write_sysreg_s(hcrx, SYS_HCRX_EL2);
+       }
+
+       __activate_traps_hfgxtr(vcpu);
 }
 
 static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
@@ -162,8 +227,10 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
                vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
        }
 
-       if (__hfgxtr_traps_required())
-               __deactivate_traps_hfgxtr();
+       if (cpus_have_final_cap(ARM64_HAS_HCX))
+               write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
+
+       __deactivate_traps_hfgxtr(vcpu);
 }
 
 static inline void ___activate_traps(struct kvm_vcpu *vcpu)
@@ -177,9 +244,6 @@ static inline void ___activate_traps(struct kvm_vcpu *vcpu)
 
        if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
                write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
-       if (cpus_have_final_cap(ARM64_HAS_HCX))
-               write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
@@ -194,9 +258,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
                vcpu->arch.hcr_el2 &= ~HCR_VSE;
                vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
        }
-
-       if (cpus_have_final_cap(ARM64_HAS_HCX))
-               write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
 }
 
 static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
index d5ec972..230e4f2 100644 (file)
@@ -26,6 +26,7 @@ int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot
 int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
                                  enum kvm_pgtable_prot prot,
                                  unsigned long *haddr);
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
 int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
 
 #endif /* __KVM_HYP_MM_H */
index a169c61..857d9bc 100644 (file)
@@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
        __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
 }
 
+static void
+handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
+{
+       DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+       DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
+       DECLARE_REG(unsigned long, pages, host_ctxt, 3);
+
+       __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
+}
+
 static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
 {
        DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
        HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
        HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
        HANDLE_FUNC(__kvm_tlb_flush_vmid),
+       HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
        HANDLE_FUNC(__kvm_flush_cpu_context),
        HANDLE_FUNC(__kvm_timer_set_cntvoff),
        HANDLE_FUNC(__vgic_v3_read_vmcr),
index 318298e..65a7a18 100644 (file)
@@ -44,6 +44,27 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
        return err;
 }
 
+static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
+{
+       unsigned long cur;
+
+       hyp_assert_lock_held(&pkvm_pgd_lock);
+
+       if (!start || start < __io_map_base)
+               return -EINVAL;
+
+       /* The allocated size is always a multiple of PAGE_SIZE */
+       cur = start + PAGE_ALIGN(size);
+
+       /* Are we overflowing on the vmemmap ? */
+       if (cur > __hyp_vmemmap)
+               return -ENOMEM;
+
+       __io_map_base = cur;
+
+       return 0;
+}
+
 /**
  * pkvm_alloc_private_va_range - Allocates a private VA range.
  * @size:      The size of the VA range to reserve.
@@ -56,27 +77,16 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size,
  */
 int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
 {
-       unsigned long base, addr;
-       int ret = 0;
+       unsigned long addr;
+       int ret;
 
        hyp_spin_lock(&pkvm_pgd_lock);
-
-       /* Align the allocation based on the order of its size */
-       addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
-
-       /* The allocated size is always a multiple of PAGE_SIZE */
-       base = addr + PAGE_ALIGN(size);
-
-       /* Are we overflowing on the vmemmap ? */
-       if (!addr || base > __hyp_vmemmap)
-               ret = -ENOMEM;
-       else {
-               __io_map_base = base;
-               *haddr = addr;
-       }
-
+       addr = __io_map_base;
+       ret = __pkvm_alloc_private_va_range(addr, size);
        hyp_spin_unlock(&pkvm_pgd_lock);
 
+       *haddr = addr;
+
        return ret;
 }
 
@@ -340,6 +350,45 @@ int hyp_create_idmap(u32 hyp_va_bits)
        return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
 }
 
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
+{
+       unsigned long addr, prev_base;
+       size_t size;
+       int ret;
+
+       hyp_spin_lock(&pkvm_pgd_lock);
+
+       prev_base = __io_map_base;
+       /*
+        * Efficient stack verification using the PAGE_SHIFT bit implies
+        * an alignment of our allocation on the order of the size.
+        */
+       size = PAGE_SIZE * 2;
+       addr = ALIGN(__io_map_base, size);
+
+       ret = __pkvm_alloc_private_va_range(addr, size);
+       if (!ret) {
+               /*
+                * Since the stack grows downwards, map the stack to the page
+                * at the higher address and leave the lower guard page
+                * unbacked.
+                *
+                * Any valid stack address now has the PAGE_SHIFT bit as 1
+                * and addresses corresponding to the guard page have the
+                * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+                */
+               ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
+                                         PAGE_SIZE, phys, PAGE_HYP);
+               if (ret)
+                       __io_map_base = prev_base;
+       }
+       hyp_spin_unlock(&pkvm_pgd_lock);
+
+       *haddr = addr + size;
+
+       return ret;
+}
+
 static void *admit_host_page(void *arg)
 {
        struct kvm_hyp_memcache *host_mc = arg;
index bb98630..0d5e0a8 100644 (file)
@@ -113,7 +113,6 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 
        for (i = 0; i < hyp_nr_cpus; i++) {
                struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
-               unsigned long hyp_addr;
 
                start = (void *)kern_hyp_va(per_cpu_base[i]);
                end = start + PAGE_ALIGN(hyp_percpu_size);
@@ -121,33 +120,9 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
                if (ret)
                        return ret;
 
-               /*
-                * Allocate a contiguous HYP private VA range for the stack
-                * and guard page. The allocation is also aligned based on
-                * the order of its size.
-                */
-               ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+               ret = pkvm_create_stack(params->stack_pa, &params->stack_hyp_va);
                if (ret)
                        return ret;
-
-               /*
-                * Since the stack grows downwards, map the stack to the page
-                * at the higher address and leave the lower guard page
-                * unbacked.
-                *
-                * Any valid stack address now has the PAGE_SHIFT bit as 1
-                * and addresses corresponding to the guard page have the
-                * PAGE_SHIFT bit as 0 - this is used for overflow detection.
-                */
-               hyp_spin_lock(&pkvm_pgd_lock);
-               ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
-                                       PAGE_SIZE, params->stack_pa, PAGE_HYP);
-               hyp_spin_unlock(&pkvm_pgd_lock);
-               if (ret)
-                       return ret;
-
-               /* Update stack_hyp_va to end of the stack's private VA range */
-               params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
        }
 
        /*
index e89a231..c353a06 100644 (file)
@@ -236,7 +236,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
                 * KVM_ARM_VCPU_INIT, however, this is likely not possible for
                 * protected VMs.
                 */
-               vcpu->arch.target = -1;
+               vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
                *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
                *exit_code |= ARM_EXCEPTION_IL;
        }
index b9991bb..1b26571 100644 (file)
@@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
        __tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t start, unsigned long pages)
+{
+       struct tlb_inv_context cxt;
+       unsigned long stride;
+
+       /*
+        * Since the range of addresses may not be mapped at
+        * the same level, assume the worst case as PAGE_SIZE
+        */
+       stride = PAGE_SIZE;
+       start = round_down(start, stride);
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt, false);
+
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+       dsb(ish);
+       __tlbi(vmalle1is);
+       dsb(ish);
+       isb();
+
+       /* See the comment in __kvm_tlb_flush_vmid_ipa() */
+       if (icache_is_vpipt())
+               icache_inval_all_pou();
+
+       __tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;
index f7a93ef..f155b8c 100644 (file)
@@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
        return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
 }
 
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t addr, size_t size)
+{
+       unsigned long pages, inval_pages;
+
+       if (!system_supports_tlb_range()) {
+               kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+               return;
+       }
+
+       pages = size >> PAGE_SHIFT;
+       while (pages > 0) {
+               inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
+               kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
+
+               addr += inval_pages << PAGE_SHIFT;
+               pages -= inval_pages;
+       }
+}
+
 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
 
 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
                 * evicted pte value (if any).
                 */
                if (kvm_pte_table(ctx->old, ctx->level))
-                       kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+                       kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+                                               kvm_granule_size(ctx->level));
                else if (kvm_pte_valid(ctx->old))
                        kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
                                     ctx->addr, ctx->level);
@@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
        smp_store_release(ctx->ptep, new);
 }
 
-static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
-                          struct kvm_pgtable_mm_ops *mm_ops)
+static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
+{
+       /*
+        * If FEAT_TLBIRANGE is implemented, defer the individual
+        * TLB invalidations until the entire walk is finished, and
+        * then use the range-based TLBI instructions to do the
+        * invalidations. Condition deferred TLB invalidation on the
+        * system supporting FWB as the optimization is entirely
+        * pointless when the unmap walker needs to perform CMOs.
+        */
+       return system_supports_tlb_range() && stage2_has_fwb(pgt);
+}
+
+static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+                               struct kvm_s2_mmu *mmu,
+                               struct kvm_pgtable_mm_ops *mm_ops)
 {
+       struct kvm_pgtable *pgt = ctx->arg;
+
        /*
-        * Clear the existing PTE, and perform break-before-make with
-        * TLB maintenance if it was valid.
+        * Clear the existing PTE, and perform break-before-make if it was
+        * valid. Depending on the system support, defer the TLB maintenance
+        * for the same until the entire unmap walk is completed.
         */
        if (kvm_pte_valid(ctx->old)) {
                kvm_clear_pte(ctx->ptep);
-               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+               if (!stage2_unmap_defer_tlb_flush(pgt))
+                       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+                                       ctx->addr, ctx->level);
        }
 
        mm_ops->put_page(ctx->ptep);
@@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
         * block entry and rely on the remaining portions being faulted
         * back lazily.
         */
-       stage2_put_pte(ctx, mmu, mm_ops);
+       stage2_unmap_put_pte(ctx, mmu, mm_ops);
 
        if (need_flush && mm_ops->dcache_clean_inval_poc)
                mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 
 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
+       int ret;
        struct kvm_pgtable_walker walker = {
                .cb     = stage2_unmap_walker,
                .arg    = pgt,
                .flags  = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
        };
 
-       return kvm_pgtable_walk(pgt, addr, size, &walker);
+       ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+       if (stage2_unmap_defer_tlb_flush(pgt))
+               /* Perform the deferred TLB invalidations */
+               kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
+
+       return ret;
 }
 
 struct stage2_attr_data {
index e69da55..46bd43f 100644 (file)
@@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
        __tlb_switch_to_host(&cxt);
 }
 
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+                               phys_addr_t start, unsigned long pages)
+{
+       struct tlb_inv_context cxt;
+       unsigned long stride;
+
+       /*
+        * Since the range of addresses may not be mapped at
+        * the same level, assume the worst case as PAGE_SIZE
+        */
+       stride = PAGE_SIZE;
+       start = round_down(start, stride);
+
+       dsb(ishst);
+
+       /* Switch to requested VMID */
+       __tlb_switch_to_guest(mmu, &cxt);
+
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+       dsb(ish);
+       __tlbi(vmalle1is);
+       dsb(ish);
+       isb();
+
+       __tlb_switch_to_host(&cxt);
+}
+
 void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;
index d3b4fee..587a104 100644 (file)
@@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
 }
 
 /**
- * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
  * @kvm:       pointer to kvm structure.
  *
  * Interface to HYP function to flush all VM TLB entries
  */
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
-       ++kvm->stat.generic.remote_tlb_flush_requests;
        kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+       return 0;
+}
+
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+                                     gfn_t gfn, u64 nr_pages)
+{
+       kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
+                               gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+       return 0;
 }
 
 static bool kvm_is_device_pfn(unsigned long pfn)
@@ -592,6 +600,25 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
        return 0;
 }
 
+static int __hyp_alloc_private_va_range(unsigned long base)
+{
+       lockdep_assert_held(&kvm_hyp_pgd_mutex);
+
+       if (!PAGE_ALIGNED(base))
+               return -EINVAL;
+
+       /*
+        * Verify that BIT(VA_BITS - 1) hasn't been flipped by
+        * allocating the new area, as it would indicate we've
+        * overflowed the idmap/IO address range.
+        */
+       if ((base ^ io_map_base) & BIT(VA_BITS - 1))
+               return -ENOMEM;
+
+       io_map_base = base;
+
+       return 0;
+}
 
 /**
  * hyp_alloc_private_va_range - Allocates a private VA range.
@@ -612,26 +639,16 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
 
        /*
         * This assumes that we have enough space below the idmap
-        * page to allocate our VAs. If not, the check below will
-        * kick. A potential alternative would be to detect that
-        * overflow and switch to an allocation above the idmap.
+        * page to allocate our VAs. If not, the check in
+        * __hyp_alloc_private_va_range() will kick. A potential
+        * alternative would be to detect that overflow and switch
+        * to an allocation above the idmap.
         *
         * The allocated size is always a multiple of PAGE_SIZE.
         */
-       base = io_map_base - PAGE_ALIGN(size);
-
-       /* Align the allocation based on the order of its size */
-       base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
-
-       /*
-        * Verify that BIT(VA_BITS - 1) hasn't been flipped by
-        * allocating the new area, as it would indicate we've
-        * overflowed the idmap/IO address range.
-        */
-       if ((base ^ io_map_base) & BIT(VA_BITS - 1))
-               ret = -ENOMEM;
-       else
-               *haddr = io_map_base = base;
+       size = PAGE_ALIGN(size);
+       base = io_map_base - size;
+       ret = __hyp_alloc_private_va_range(base);
 
        mutex_unlock(&kvm_hyp_pgd_mutex);
 
@@ -668,6 +685,48 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
        return ret;
 }
 
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
+{
+       unsigned long base;
+       size_t size;
+       int ret;
+
+       mutex_lock(&kvm_hyp_pgd_mutex);
+       /*
+        * Efficient stack verification using the PAGE_SHIFT bit implies
+        * an alignment of our allocation on the order of the size.
+        */
+       size = PAGE_SIZE * 2;
+       base = ALIGN_DOWN(io_map_base - size, size);
+
+       ret = __hyp_alloc_private_va_range(base);
+
+       mutex_unlock(&kvm_hyp_pgd_mutex);
+
+       if (ret) {
+               kvm_err("Cannot allocate hyp stack guard page\n");
+               return ret;
+       }
+
+       /*
+        * Since the stack grows downwards, map the stack to the page
+        * at the higher address and leave the lower guard page
+        * unbacked.
+        *
+        * Any valid stack address now has the PAGE_SHIFT bit as 1
+        * and addresses corresponding to the guard page have the
+        * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+        */
+       ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
+                                   PAGE_HYP);
+       if (ret)
+               kvm_err("Cannot map hyp stack\n");
+
+       *haddr = base + size;
+
+       return ret;
+}
+
 /**
  * create_hyp_io_mappings - Map IO into both kernel and HYP
  * @phys_addr: The physical start address which gets mapped
@@ -1075,7 +1134,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
        write_lock(&kvm->mmu_lock);
        stage2_wp_range(&kvm->arch.mmu, start, end);
        write_unlock(&kvm->mmu_lock);
-       kvm_flush_remote_tlbs(kvm);
+       kvm_flush_remote_tlbs_memslot(kvm, memslot);
 }
 
 /**
@@ -1541,7 +1600,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 out_unlock:
        read_unlock(&kvm->mmu_lock);
-       kvm_set_pfn_accessed(pfn);
        kvm_release_pfn_clean(pfn);
        return ret != -EAGAIN ? ret : 0;
 }
@@ -1721,7 +1779,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-       kvm_pfn_t pfn = pte_pfn(range->pte);
+       kvm_pfn_t pfn = pte_pfn(range->arg.pte);
 
        if (!kvm->arch.mmu.pgt)
                return false;
index 315354d..042695a 100644 (file)
@@ -71,8 +71,9 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
                break;
 
        case SYS_ID_AA64MMFR0_EL1:
-               /* Hide ECV, FGT, ExS, Secure Memory */
-               val &= ~(GENMASK_ULL(63, 43)            |
+               /* Hide ECV, ExS, Secure Memory */
+               val &= ~(NV_FTR(MMFR0, ECV)             |
+                        NV_FTR(MMFR0, EXS)             |
                         NV_FTR(MMFR0, TGRAN4_2)        |
                         NV_FTR(MMFR0, TGRAN16_2)       |
                         NV_FTR(MMFR0, TGRAN64_2)       |
@@ -116,7 +117,8 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
                break;
 
        case SYS_ID_AA64MMFR1_EL1:
-               val &= (NV_FTR(MMFR1, PAN)      |
+               val &= (NV_FTR(MMFR1, HCX)      |
+                       NV_FTR(MMFR1, PAN)      |
                        NV_FTR(MMFR1, LO)       |
                        NV_FTR(MMFR1, HPDS)     |
                        NV_FTR(MMFR1, VH)       |
@@ -124,8 +126,7 @@ void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
                break;
 
        case SYS_ID_AA64MMFR2_EL1:
-               val &= ~(NV_FTR(MMFR2, EVT)     |
-                        NV_FTR(MMFR2, BBM)     |
+               val &= ~(NV_FTR(MMFR2, BBM)     |
                         NV_FTR(MMFR2, TTL)     |
                         GENMASK_ULL(47, 44)    |
                         NV_FTR(MMFR2, ST)      |
index 5606509..6b066e0 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/kvm_emulate.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_vgic.h>
+#include <asm/arm_pmuv3.h>
 
 #define PERF_ATTR_CFG1_COUNTER_64BIT   BIT(0)
 
@@ -35,12 +36,8 @@ static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
        return &vcpu->arch.pmu.pmc[cnt_idx];
 }
 
-static u32 kvm_pmu_event_mask(struct kvm *kvm)
+static u32 __kvm_pmu_event_mask(unsigned int pmuver)
 {
-       unsigned int pmuver;
-
-       pmuver = kvm->arch.arm_pmu->pmuver;
-
        switch (pmuver) {
        case ID_AA64DFR0_EL1_PMUVer_IMP:
                return GENMASK(9, 0);
@@ -55,6 +52,14 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
        }
 }
 
+static u32 kvm_pmu_event_mask(struct kvm *kvm)
+{
+       u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1);
+       u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0);
+
+       return __kvm_pmu_event_mask(pmuver);
+}
+
 /**
  * kvm_pmc_is_64bit - determine if counter is 64bit
  * @pmc: counter context
@@ -672,8 +677,11 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
 {
        struct arm_pmu_entry *entry;
 
-       if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
-           pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+       /*
+        * Check the sanitised PMU version for the system, as KVM does not
+        * support implementations where PMUv3 exists on a subset of CPUs.
+        */
+       if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
                return;
 
        mutex_lock(&arm_pmus_lock);
@@ -750,11 +758,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
        } else {
                val = read_sysreg(pmceid1_el0);
                /*
-                * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
+                * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
                 * as RAZ
                 */
-               if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
-                       val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
+               val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
+                        BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
+                        BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
                base = 32;
        }
 
@@ -950,11 +959,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
                return 0;
        }
        case KVM_ARM_VCPU_PMU_V3_FILTER: {
+               u8 pmuver = kvm_arm_pmu_get_pmuver_limit();
                struct kvm_pmu_event_filter __user *uaddr;
                struct kvm_pmu_event_filter filter;
                int nr_events;
 
-               nr_events = kvm_pmu_event_mask(kvm) + 1;
+               /*
+                * Allow userspace to specify an event filter for the entire
+                * event range supported by PMUVer of the hardware, rather
+                * than the guest's PMUVer for KVM backward compatibility.
+                */
+               nr_events = __kvm_pmu_event_mask(pmuver) + 1;
 
                uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
 
index 121f1a1..0eea225 100644 (file)
@@ -236,3 +236,21 @@ bool kvm_set_pmuserenr(u64 val)
        ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
        return true;
 }
+
+/*
+ * If we interrupted the guest to update the host PMU context, make
+ * sure we re-apply the guest EL0 state.
+ */
+void kvm_vcpu_pmu_resync_el0(void)
+{
+       struct kvm_vcpu *vcpu;
+
+       if (!has_vhe() || !in_interrupt())
+               return;
+
+       vcpu = kvm_get_running_vcpu();
+       if (!vcpu)
+               return;
+
+       kvm_make_request(KVM_REQ_RESYNC_PMU_EL0, vcpu);
+}
index bc8556b..7a65a35 100644 (file)
@@ -248,21 +248,16 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
                }
        }
 
-       switch (vcpu->arch.target) {
-       default:
-               if (vcpu_el1_is_32bit(vcpu)) {
-                       pstate = VCPU_RESET_PSTATE_SVC;
-               } else if (vcpu_has_nv(vcpu)) {
-                       pstate = VCPU_RESET_PSTATE_EL2;
-               } else {
-                       pstate = VCPU_RESET_PSTATE_EL1;
-               }
-
-               if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-               break;
+       if (vcpu_el1_is_32bit(vcpu))
+               pstate = VCPU_RESET_PSTATE_SVC;
+       else if (vcpu_has_nv(vcpu))
+               pstate = VCPU_RESET_PSTATE_EL2;
+       else
+               pstate = VCPU_RESET_PSTATE_EL1;
+
+       if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
+               ret = -EINVAL;
+               goto out;
        }
 
        /* Reset core registers */
index 2ca2973..e92ec81 100644 (file)
@@ -2151,6 +2151,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
        { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
 
+       { SYS_DESC(SYS_ACCDATA_EL1), undef_access },
+
        { SYS_DESC(SYS_SCXTNUM_EL1), undef_access },
 
        { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
@@ -2365,8 +2367,13 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
        EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
        EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
        EL2_REG(HACR_EL2, access_rw, reset_val, 0),
 
+       EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
+
        EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
        EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
        EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
@@ -2374,6 +2381,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
 
        { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+       EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
+       EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
        EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
        EL2_REG(ELR_EL2, access_rw, reset_val, 0),
        { SYS_DESC(SYS_SP_EL1), access_sp_el1},
@@ -3170,6 +3179,9 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 
        trace_kvm_handle_sys_reg(esr);
 
+       if (__check_nv_sr_forward(vcpu))
+               return 1;
+
        params = esr_sys64_to_params(esr);
        params.regval = vcpu_get_reg(vcpu, Rt);
 
@@ -3587,5 +3599,8 @@ int __init kvm_sys_reg_table_init(void)
        if (!first_idreg)
                return -EINVAL;
 
+       if (kvm_get_mode() == KVM_MODE_NV)
+               return populate_nv_trap_config();
+
        return 0;
 }
index 6ce5c02..8ad5310 100644 (file)
@@ -364,6 +364,32 @@ TRACE_EVENT(kvm_inject_nested_exception,
                  __entry->hcr_el2)
 );
 
+TRACE_EVENT(kvm_forward_sysreg_trap,
+           TP_PROTO(struct kvm_vcpu *vcpu, u32 sysreg, bool is_read),
+           TP_ARGS(vcpu, sysreg, is_read),
+
+           TP_STRUCT__entry(
+               __field(u64,    pc)
+               __field(u32,    sysreg)
+               __field(bool,   is_read)
+           ),
+
+           TP_fast_assign(
+               __entry->pc = *vcpu_pc(vcpu);
+               __entry->sysreg = sysreg;
+               __entry->is_read = is_read;
+           ),
+
+           TP_printk("%llx %c (%d,%d,%d,%d,%d)",
+                     __entry->pc,
+                     __entry->is_read ? 'R' : 'W',
+                     sys_reg_Op0(__entry->sysreg),
+                     sys_reg_Op1(__entry->sysreg),
+                     sys_reg_CRn(__entry->sysreg),
+                     sys_reg_CRm(__entry->sysreg),
+                     sys_reg_Op2(__entry->sysreg))
+);
+
 #endif /* _TRACE_ARM_ARM64_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
index f9923be..0ab09b0 100644 (file)
@@ -199,7 +199,6 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
 int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
                         int offset, u32 *val);
@@ -233,7 +232,6 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 void vgic_v3_enable(struct kvm_vcpu *vcpu);
index 78b87a6..2432683 100644 (file)
@@ -24,7 +24,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len)
        const u64 *ptr;
        u64 data, sum64 = 0;
 
-       if (unlikely(len == 0))
+       if (unlikely(len <= 0))
                return 0;
 
        offset = (unsigned long)buff & 7;
index c80ed4f..c3f06fd 100644 (file)
@@ -26,6 +26,7 @@ HAS_ECV
 HAS_ECV_CNTPOFF
 HAS_EPAN
 HAS_EVT
+HAS_FGT
 HAS_GENERIC_AUTH
 HAS_GENERIC_AUTH_ARCH_QARMA3
 HAS_GENERIC_AUTH_ARCH_QARMA5
index 65866bf..2517ef7 100644 (file)
@@ -2156,6 +2156,135 @@ Field   1       ICIALLU
 Field  0       ICIALLUIS
 EndSysreg
 
+Sysreg HDFGRTR_EL2     3       4       3       1       4
+Field  63      PMBIDR_EL1
+Field  62      nPMSNEVFR_EL1
+Field  61      nBRBDATA
+Field  60      nBRBCTL
+Field  59      nBRBIDR
+Field  58      PMCEIDn_EL0
+Field  57      PMUSERENR_EL0
+Field  56      TRBTRG_EL1
+Field  55      TRBSR_EL1
+Field  54      TRBPTR_EL1
+Field  53      TRBMAR_EL1
+Field  52      TRBLIMITR_EL1
+Field  51      TRBIDR_EL1
+Field  50      TRBBASER_EL1
+Res0   49
+Field  48      TRCVICTLR
+Field  47      TRCSTATR
+Field  46      TRCSSCSRn
+Field  45      TRCSEQSTR
+Field  44      TRCPRGCTLR
+Field  43      TRCOSLSR
+Res0   42
+Field  41      TRCIMSPECn
+Field  40      TRCID
+Res0   39:38
+Field  37      TRCCNTVRn
+Field  36      TRCCLAIM
+Field  35      TRCAUXCTLR
+Field  34      TRCAUTHSTATUS
+Field  33      TRC
+Field  32      PMSLATFR_EL1
+Field  31      PMSIRR_EL1
+Field  30      PMSIDR_EL1
+Field  29      PMSICR_EL1
+Field  28      PMSFCR_EL1
+Field  27      PMSEVFR_EL1
+Field  26      PMSCR_EL1
+Field  25      PMBSR_EL1
+Field  24      PMBPTR_EL1
+Field  23      PMBLIMITR_EL1
+Field  22      PMMIR_EL1
+Res0   21:20
+Field  19      PMSELR_EL0
+Field  18      PMOVS
+Field  17      PMINTEN
+Field  16      PMCNTEN
+Field  15      PMCCNTR_EL0
+Field  14      PMCCFILTR_EL0
+Field  13      PMEVTYPERn_EL0
+Field  12      PMEVCNTRn_EL0
+Field  11      OSDLR_EL1
+Field  10      OSECCR_EL1
+Field  9       OSLSR_EL1
+Res0   8
+Field  7       DBGPRCR_EL1
+Field  6       DBGAUTHSTATUS_EL1
+Field  5       DBGCLAIM
+Field  4       MDSCR_EL1
+Field  3       DBGWVRn_EL1
+Field  2       DBGWCRn_EL1
+Field  1       DBGBVRn_EL1
+Field  0       DBGBCRn_EL1
+EndSysreg
+
+Sysreg HDFGWTR_EL2     3       4       3       1       5
+Res0   63
+Field  62      nPMSNEVFR_EL1
+Field  61      nBRBDATA
+Field  60      nBRBCTL
+Res0   59:58
+Field  57      PMUSERENR_EL0
+Field  56      TRBTRG_EL1
+Field  55      TRBSR_EL1
+Field  54      TRBPTR_EL1
+Field  53      TRBMAR_EL1
+Field  52      TRBLIMITR_EL1
+Res0   51
+Field  50      TRBBASER_EL1
+Field  49      TRFCR_EL1
+Field  48      TRCVICTLR
+Res0   47
+Field  46      TRCSSCSRn
+Field  45      TRCSEQSTR
+Field  44      TRCPRGCTLR
+Res0   43
+Field  42      TRCOSLAR
+Field  41      TRCIMSPECn
+Res0   40:38
+Field  37      TRCCNTVRn
+Field  36      TRCCLAIM
+Field  35      TRCAUXCTLR
+Res0   34
+Field  33      TRC
+Field  32      PMSLATFR_EL1
+Field  31      PMSIRR_EL1
+Res0   30
+Field  29      PMSICR_EL1
+Field  28      PMSFCR_EL1
+Field  27      PMSEVFR_EL1
+Field  26      PMSCR_EL1
+Field  25      PMBSR_EL1
+Field  24      PMBPTR_EL1
+Field  23      PMBLIMITR_EL1
+Res0   22
+Field  21      PMCR_EL0
+Field  20      PMSWINC_EL0
+Field  19      PMSELR_EL0
+Field  18      PMOVS
+Field  17      PMINTEN
+Field  16      PMCNTEN
+Field  15      PMCCNTR_EL0
+Field  14      PMCCFILTR_EL0
+Field  13      PMEVTYPERn_EL0
+Field  12      PMEVCNTRn_EL0
+Field  11      OSDLR_EL1
+Field  10      OSECCR_EL1
+Res0   9
+Field  8       OSLAR_EL1
+Field  7       DBGPRCR_EL1
+Res0   6
+Field  5       DBGCLAIM
+Field  4       MDSCR_EL1
+Field  3       DBGWVRn_EL1
+Field  2       DBGWCRn_EL1
+Field  1       DBGBVRn_EL1
+Field  0       DBGBCRn_EL1
+EndSysreg
+
 Sysreg ZCR_EL2 3       4       1       2       0
 Fields ZCR_ELx
 EndSysreg
index 3373324..aefae2e 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += agp.h
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += vtime.h
index 5eba3fb..ac06d44 100644 (file)
@@ -37,7 +37,7 @@
  *     pNonSys:        !pSys
  */
 
-
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/asmmacro.h>
 #include <asm/cache.h>
@@ -49,7 +49,6 @@
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
-#include <asm/export.h>
 
 #include "minstate.h"
 
index 821e68d..9928c5b 100644 (file)
@@ -34,9 +34,9 @@
 #define PSR_BITS_TO_SET                                                        \
        (IA64_PSR_BN)
 
+#include <linux/export.h>
 #include <asm/processor.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 /*
  * Inputs:
index c096500..85c8a57 100644 (file)
@@ -20,7 +20,7 @@
  *   Support for CPU Hotplug
  */
 
-
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/asmmacro.h>
 #include <asm/fpu.h>
@@ -33,7 +33,6 @@
 #include <asm/mca_asm.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define SAL_PSR_BITS_TO_SET                            \
index 7a418e3..da90c49 100644 (file)
@@ -47,7 +47,7 @@
  * Table is based upon EAS2.6 (Oct 1999)
  */
 
-
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/asmmacro.h>
 #include <asm/break.h>
@@ -58,7 +58,6 @@
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 
 #if 0
 # define PSR_DEFAULT_BITS      psr.ac
index 06d01a0..fb6db69 100644 (file)
@@ -13,9 +13,9 @@
  * 05/24/2000 eranian Added support for physical mode static calls
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/processor.h>
-#include <asm/export.h>
 
        .data
 pal_entry_point:
index 65b7508..ba0dd25 100644 (file)
@@ -10,9 +10,9 @@
  * 3/08/02 davidm      Some more tweaking
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #ifdef CONFIG_ITANIUM
 # define L3_LINE_SIZE  64      // Itanium L3 line size
index a28f39d..1d9e45c 100644 (file)
@@ -12,8 +12,8 @@
  *     Stephane Eranian <eranian@hpl.hp.com>
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 //
 // arguments
index 176f857..c0a0e6b 100644 (file)
@@ -15,9 +15,9 @@
  *
  * 4/06/01 davidm      Tuned to make it perform well both for cached and uncached copies.
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #define PIPE_DEPTH     3
 #define EPI            p[PIPE_DEPTH-1]
index d6fd56e..5e8bb4b 100644 (file)
@@ -60,9 +60,9 @@
  *     to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
  *     an order that avoids bank conflicts.
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #define PREFETCH_DIST  8               // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
 
index f681556..8daab72 100644 (file)
@@ -30,8 +30,8 @@
  *     - fix extraneous stop bit introduced by the EX() macro.
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 //
 // Tuneable parameters
index 8573d59..f8e795f 100644 (file)
@@ -8,9 +8,8 @@
  * 05/28/05 Zoltan Menyhart    Dynamic stride size
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
-
 
        /*
         * flush_icache_range(start,end)
index def92b7..83586fb 100644 (file)
@@ -15,8 +15,8 @@
  * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 #ifdef MODULO
 # define OP    mod
index a8ba3bd..5c91136 100644 (file)
@@ -15,8 +15,8 @@
  * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 #ifdef MODULO
 # define OP    mod
index dc9e6e6..fcc0b81 100644 (file)
@@ -13,8 +13,8 @@
  * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 /*
  * Since we know that most likely this function is called with buf aligned
index 91a625f..35c9069 100644 (file)
@@ -14,8 +14,8 @@
  *     Stephane Eranian <eranian@hpl.hp.com>
  *     David Mosberger-Tang <davidm@hpl.hp.com>
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(memcpy)
 
index cc4e6ac..c0d4362 100644 (file)
@@ -14,9 +14,9 @@
  * Copyright (C) 2002 Intel Corp.
  * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
  */
+#include <linux/export.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
 #define EK(y...) EX(y)
 
index 07a8b92..552c5c7 100644 (file)
@@ -18,8 +18,8 @@
    Since a stf.spill f0 can store 16B in one go, we use this instruction
    to get peak speed when value = 0.  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 #undef ret
 
 #define dest           in0
index d66de59..1f4a46c 100644 (file)
@@ -17,8 +17,8 @@
  * 09/24/99 S.Eranian add speculation recovery code
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 //
 //
index 49eb81b..a287169 100644 (file)
@@ -17,8 +17,8 @@
  *                      by Andreas Schwab <schwab@suse.de>).
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(__strncpy_from_user)
        alloc r2=ar.pfs,3,0,0,0
index 4b684d4..a7eb56e 100644 (file)
@@ -13,8 +13,8 @@
  * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(__strnlen_user)
        .prologue
index 5413daf..6e2a696 100644 (file)
@@ -5,8 +5,8 @@
  * Optimized RAID-5 checksumming functions for IA-64.
  */
 
+#include <linux/export.h>
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 GLOBAL_ENTRY(xor_ia64_2)
        .prologue
index ecf282d..e14396a 100644 (file)
@@ -8,11 +8,13 @@ config LOONGARCH
        select ACPI_PPTT if ACPI
        select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
        select ARCH_BINFMT_ELF_STATE
+       select ARCH_DISABLE_KASAN_INLINE
        select ARCH_ENABLE_MEMORY_HOTPLUG
        select ARCH_ENABLE_MEMORY_HOTREMOVE
        select ARCH_HAS_ACPI_TABLE_UPGRADE      if ACPI
        select ARCH_HAS_CPU_FINALIZE_INIT
        select ARCH_HAS_FORTIFY_SOURCE
+       select ARCH_HAS_KCOV
        select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
        select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
        select ARCH_HAS_PTE_SPECIAL
@@ -91,6 +93,9 @@ config LOONGARCH
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_JUMP_LABEL_RELATIVE
+       select HAVE_ARCH_KASAN
+       select HAVE_ARCH_KFENCE
+       select HAVE_ARCH_KGDB if PERF_EVENTS
        select HAVE_ARCH_MMAP_RND_BITS if MMU
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
@@ -115,6 +120,7 @@ config LOONGARCH
        select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
+       select HAVE_GCC_PLUGINS
        select HAVE_GENERIC_VDSO
        select HAVE_HW_BREAKPOINT if PERF_EVENTS
        select HAVE_IOREMAP_PROT
@@ -254,6 +260,9 @@ config AS_HAS_LSX_EXTENSION
 config AS_HAS_LASX_EXTENSION
        def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
 
+config AS_HAS_LBT_EXTENSION
+       def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0)
+
 menu "Kernel type and options"
 
 source "kernel/Kconfig.hz"
@@ -534,6 +543,18 @@ config CPU_HAS_LASX
 
          If unsure, say Y.
 
+config CPU_HAS_LBT
+       bool "Support for the Loongson Binary Translation Extension"
+       depends on AS_HAS_LBT_EXTENSION
+       help
+         Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0
+         to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop).
+         Enabling this option allows the kernel to allocate and switch registers
+         specific to LBT.
+
+         If you want to use this feature, such as the Loongson Architecture
+         Translator (LAT), say Y.
+
 config CPU_HAS_PREFETCH
        bool
        default y
@@ -638,6 +659,11 @@ config ARCH_MMAP_RND_BITS_MAX
 config ARCH_SUPPORTS_UPROBES
        def_bool y
 
+config KASAN_SHADOW_OFFSET
+       hex
+       default 0x0
+       depends on KASAN
+
 menu "Power management options"
 
 config ARCH_SUSPEND_POSSIBLE
index ef87bab..fb0fada 100644 (file)
@@ -84,7 +84,10 @@ LDFLAGS_vmlinux                      += -static -pie --no-dynamic-linker -z notext
 endif
 
 cflags-y += $(call cc-option, -mno-check-zero-division)
+
+ifndef CONFIG_KASAN
 cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
+endif
 
 load-y         = 0x9000000000200000
 bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
index d64849b..a3b52aa 100644 (file)
@@ -30,7 +30,6 @@ CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
-CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
@@ -47,8 +46,12 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
+CONFIG_CPU_HAS_FPU=y
+CONFIG_CPU_HAS_LSX=y
+CONFIG_CPU_HAS_LASX=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+CONFIG_RANDOMIZE_BASE=y
 CONFIG_SUSPEND=y
 CONFIG_HIBERNATION=y
 CONFIG_ACPI=y
@@ -63,6 +66,7 @@ CONFIG_EFI_ZBOOT=y
 CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
 CONFIG_EFI_CAPSULE_LOADER=m
 CONFIG_EFI_TEST=m
+CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
@@ -108,7 +112,12 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
 CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
 CONFIG_INET_ESP=m
 CONFIG_INET_UDP_DIAG=y
 CONFIG_TCP_CONG_ADVANCED=y
@@ -137,7 +146,6 @@ CONFIG_NFT_MASQ=m
 CONFIG_NFT_REDIR=m
 CONFIG_NFT_NAT=m
 CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_QUOTA=m
 CONFIG_NFT_REJECT=m
@@ -208,7 +216,11 @@ CONFIG_IP_VS=m
 CONFIG_IP_VS_IPV6=y
 CONFIG_IP_VS_PROTO_TCP=y
 CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
 CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
 CONFIG_IP_VS_NFCT=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_DUP_IPV4=m
@@ -227,7 +239,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
 CONFIG_IP_NF_TARGET_REDIRECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
@@ -363,6 +374,8 @@ CONFIG_MTD_CFI_AMDSTD=m
 CONFIG_MTD_CFI_STAA=m
 CONFIG_MTD_RAM=m
 CONFIG_MTD_ROM=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_BLOCK=y
 CONFIG_PARPORT=y
 CONFIG_PARPORT_PC=y
 CONFIG_PARPORT_SERIAL=y
@@ -370,6 +383,7 @@ CONFIG_PARPORT_PC_FIFO=y
 CONFIG_ZRAM=m
 CONFIG_ZRAM_DEF_COMP_ZSTD=y
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -516,6 +530,8 @@ CONFIG_STMMAC_ETH=y
 # CONFIG_NET_VENDOR_TEHUTI is not set
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VIA is not set
+CONFIG_NGBE=y
+CONFIG_TXGBE=y
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
@@ -602,9 +618,15 @@ CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_PIIX4=y
 CONFIG_I2C_GPIO=y
+CONFIG_I2C_LS2X=y
 CONFIG_SPI=y
+CONFIG_SPI_LOONGSON_PCI=m
+CONFIG_SPI_LOONGSON_PLATFORM=m
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_LOONGSON2=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_LOONGSON=y
+CONFIG_GPIO_LOONGSON_64BIT=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_RESTART=y
 CONFIG_POWER_RESET_SYSCON=y
@@ -614,6 +636,7 @@ CONFIG_SENSORS_LM75=m
 CONFIG_SENSORS_LM93=m
 CONFIG_SENSORS_W83795=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_LOONGSON2_THERMAL=m
 CONFIG_RC_CORE=m
 CONFIG_LIRC=y
 CONFIG_RC_DECODERS=y
@@ -643,6 +666,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
 CONFIG_DRM_AST=y
 CONFIG_DRM_QXL=m
 CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_LOONGSON=y
 CONFIG_FB=y
 CONFIG_FB_EFI=y
 CONFIG_FB_RADEON=y
@@ -712,6 +736,7 @@ CONFIG_UCSI_ACPI=m
 CONFIG_INFINIBAND=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_LOONGSON=y
 CONFIG_DMADEVICES=y
 CONFIG_UIO=m
 CONFIG_UIO_PDRV_GENIRQ=m
@@ -745,7 +770,9 @@ CONFIG_COMEDI_NI_LABPC_PCI=m
 CONFIG_COMEDI_NI_PCIDIO=m
 CONFIG_COMEDI_NI_PCIMIO=m
 CONFIG_STAGING=y
-CONFIG_R8188EU=m
+CONFIG_COMMON_CLK_LOONGSON2=y
+CONFIG_LOONGSON2_GUTS=y
+CONFIG_LOONGSON2_PM=y
 CONFIG_PM_DEVFREQ=y
 CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_DEVFREQ_GOV_PERFORMANCE=y
@@ -759,10 +786,17 @@ CONFIG_EXT2_FS_SECURITY=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
 CONFIG_XFS_FS=y
 CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -771,11 +805,14 @@ CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
 CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=y
 CONFIG_OVERLAY_FS_INDEX=y
 CONFIG_OVERLAY_FS_XINO_AUTO=y
 CONFIG_OVERLAY_FS_METACOPY=y
 CONFIG_FSCACHE=y
+CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
@@ -784,19 +821,42 @@ CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_FAT_DEFAULT_CODEPAGE=936
 CONFIG_FAT_DEFAULT_IOCHARSET="gb2312"
+CONFIG_EXFAT_FS=m
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=y
+CONFIG_ORANGEFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_ECRYPT_FS_MESSAGING=y
 CONFIG_HFS_FS=m
 CONFIG_HFSPLUS_FS=m
+CONFIG_UBIFS_FS=m
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
 CONFIG_CRAMFS=m
 CONFIG_SQUASHFS=y
 CONFIG_SQUASHFS_XATTR=y
 CONFIG_SQUASHFS_LZ4=y
 CONFIG_SQUASHFS_LZO=y
 CONFIG_SQUASHFS_XZ=y
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_PSTORE=m
+CONFIG_PSTORE_LZO_COMPRESS=m
+CONFIG_PSTORE_LZ4_COMPRESS=m
+CONFIG_PSTORE_LZ4HC_COMPRESS=m
+CONFIG_PSTORE_842_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_ZIP_LZMA=y
+CONFIG_EROFS_FS_PCPU_KTHREAD=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
@@ -807,6 +867,10 @@ CONFIG_NFSD=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_CEPH_FS=m
+CONFIG_CEPH_FSCACHE=y
+CONFIG_CEPH_FS_POSIX_ACL=y
+CONFIG_CEPH_FS_SECURITY_LABEL=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_DEBUG is not set
 CONFIG_9P_FS=y
@@ -814,6 +878,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_936=y
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_UTF8=y
+CONFIG_DLM=m
 CONFIG_KEY_DH_OPERATIONS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_SELINUX=y
@@ -847,6 +912,7 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_CRC32_LOONGARCH=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_PRINTK_TIME=y
 CONFIG_STRIP_ASM_SYMS=y
index ed06d39..cf8e1a4 100644 (file)
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/uaccess.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/ftrace.h>
index 79e1d53..c9544f3 100644 (file)
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 
-       .macro  parse_v var val
-       \var    = \val
-       .endm
-
-       .macro  parse_r var r
-       \var    = -1
-       .ifc    \r, $r0
-       \var    = 0
-       .endif
-       .ifc    \r, $r1
-       \var    = 1
-       .endif
-       .ifc    \r, $r2
-       \var    = 2
-       .endif
-       .ifc    \r, $r3
-       \var    = 3
-       .endif
-       .ifc    \r, $r4
-       \var    = 4
-       .endif
-       .ifc    \r, $r5
-       \var    = 5
-       .endif
-       .ifc    \r, $r6
-       \var    = 6
-       .endif
-       .ifc    \r, $r7
-       \var    = 7
-       .endif
-       .ifc    \r, $r8
-       \var    = 8
-       .endif
-       .ifc    \r, $r9
-       \var    = 9
-       .endif
-       .ifc    \r, $r10
-       \var    = 10
-       .endif
-       .ifc    \r, $r11
-       \var    = 11
-       .endif
-       .ifc    \r, $r12
-       \var    = 12
-       .endif
-       .ifc    \r, $r13
-       \var    = 13
-       .endif
-       .ifc    \r, $r14
-       \var    = 14
-       .endif
-       .ifc    \r, $r15
-       \var    = 15
-       .endif
-       .ifc    \r, $r16
-       \var    = 16
-       .endif
-       .ifc    \r, $r17
-       \var    = 17
-       .endif
-       .ifc    \r, $r18
-       \var    = 18
-       .endif
-       .ifc    \r, $r19
-       \var    = 19
-       .endif
-       .ifc    \r, $r20
-       \var    = 20
-       .endif
-       .ifc    \r, $r21
-       \var    = 21
-       .endif
-       .ifc    \r, $r22
-       \var    = 22
-       .endif
-       .ifc    \r, $r23
-       \var    = 23
-       .endif
-       .ifc    \r, $r24
-       \var    = 24
-       .endif
-       .ifc    \r, $r25
-       \var    = 25
-       .endif
-       .ifc    \r, $r26
-       \var    = 26
-       .endif
-       .ifc    \r, $r27
-       \var    = 27
-       .endif
-       .ifc    \r, $r28
-       \var    = 28
-       .endif
-       .ifc    \r, $r29
-       \var    = 29
-       .endif
-       .ifc    \r, $r30
-       \var    = 30
-       .endif
-       .ifc    \r, $r31
-       \var    = 31
-       .endif
-       .iflt   \var
-       .error  "Unable to parse register name \r"
-       .endif
-       .endm
-
        .macro  cpu_save_nonscratch thread
        stptr.d s0, \thread, THREAD_REG23
        stptr.d s1, \thread, THREAD_REG24
 
        .macro fpu_save_csr thread tmp
        movfcsr2gr      \tmp, fcsr0
-       stptr.w \tmp, \thread, THREAD_FCSR
+       stptr.w         \tmp, \thread, THREAD_FCSR
+#ifdef CONFIG_CPU_HAS_LBT
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp, \tmp, FPU_CSR_TM
+       beqz            \tmp, 1f
+       /* Save FTOP */
+       x86mftop        \tmp
+       stptr.w         \tmp, \thread, THREAD_FTOP
+       /* Turn off TM to ensure the order of FPR in memory independent of TM */
+       x86clrtm
+1:
+#endif
        .endm
 
-       .macro fpu_restore_csr thread tmp
-       ldptr.w \tmp, \thread, THREAD_FCSR
-       movgr2fcsr      fcsr0, \tmp
+       .macro fpu_restore_csr thread tmp0 tmp1
+       ldptr.w         \tmp0, \thread, THREAD_FCSR
+       movgr2fcsr      fcsr0, \tmp0
+#ifdef CONFIG_CPU_HAS_LBT
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp0, \tmp0, FPU_CSR_TM
+       beqz            \tmp0, 2f
+       /* Restore FTOP */
+       ldptr.w         \tmp0, \thread, THREAD_FTOP
+       andi            \tmp0, \tmp0, 0x7
+       la.pcrel        \tmp1, 1f
+       alsl.d          \tmp1, \tmp0, \tmp1, 3
+       jr              \tmp1
+1:
+       x86mttop        0
+       b       2f
+       x86mttop        1
+       b       2f
+       x86mttop        2
+       b       2f
+       x86mttop        3
+       b       2f
+       x86mttop        4
+       b       2f
+       x86mttop        5
+       b       2f
+       x86mttop        6
+       b       2f
+       x86mttop        7
+2:
+#endif
        .endm
 
        .macro fpu_save_cc thread tmp0 tmp1
        .macro  lsx_restore_all thread tmp0 tmp1
        lsx_restore_data        \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
-       fpu_restore_csr         \thread, \tmp0
+       fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm
 
        .macro  lsx_save_upper vd base tmp off
        .macro  lasx_restore_all thread tmp0 tmp1
        lasx_restore_data       \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
-       fpu_restore_csr         \thread, \tmp0
+       fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm
 
        .macro  lasx_save_upper xd base tmp off
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
new file mode 100644 (file)
index 0000000..deeff81
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/mmzone.h>
+#include <asm/addrspace.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+#define __HAVE_ARCH_SHADOW_MAP
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
+#define XRANGE_SHIFT (48)
+
+/* Valid address length */
+#define XRANGE_SHADOW_SHIFT    (PGDIR_SHIFT + PAGE_SHIFT - 3)
+/* Used for taking out the valid address */
+#define XRANGE_SHADOW_MASK     GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
+/* One segment whole address space size */
+#define XRANGE_SIZE            (XRANGE_SHADOW_MASK + 1)
+
+/* 64-bit segment value. */
+#define XKPRANGE_UC_SEG                (0x8000)
+#define XKPRANGE_CC_SEG                (0x9000)
+#define XKVRANGE_VC_SEG                (0xffff)
+
+/* Cached */
+#define XKPRANGE_CC_START              CACHE_BASE
+#define XKPRANGE_CC_SIZE               XRANGE_SIZE
+#define XKPRANGE_CC_KASAN_OFFSET       (0)
+#define XKPRANGE_CC_SHADOW_SIZE                (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_CC_SHADOW_END         (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE)
+
+/* UnCached */
+#define XKPRANGE_UC_START              UNCACHE_BASE
+#define XKPRANGE_UC_SIZE               XRANGE_SIZE
+#define XKPRANGE_UC_KASAN_OFFSET       XKPRANGE_CC_SHADOW_END
+#define XKPRANGE_UC_SHADOW_SIZE                (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_UC_SHADOW_END         (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE)
+
+/* VMALLOC (Cached or UnCached)  */
+#define XKVRANGE_VC_START              MODULES_VADDR
+#define XKVRANGE_VC_SIZE               round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE)
+#define XKVRANGE_VC_KASAN_OFFSET       XKPRANGE_UC_SHADOW_END
+#define XKVRANGE_VC_SHADOW_SIZE                (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKVRANGE_VC_SHADOW_END         (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE)
+
+/* KAsan shadow memory start right after vmalloc. */
+#define KASAN_SHADOW_START             round_up(KFENCE_AREA_END, PGDIR_SIZE)
+#define KASAN_SHADOW_SIZE              (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET)
+#define KASAN_SHADOW_END               round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE)
+
+#define XKPRANGE_CC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET)
+#define XKPRANGE_UC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET)
+#define XKVRANGE_VC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET)
+
+extern bool kasan_early_stage;
+extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
+
+#define kasan_arch_is_ready kasan_arch_is_ready
+static __always_inline bool kasan_arch_is_ready(void)
+{
+       return !kasan_early_stage;
+}
+
+static inline void *kasan_mem_to_shadow(const void *addr)
+{
+       if (!kasan_arch_is_ready()) {
+               return (void *)(kasan_early_shadow_page);
+       } else {
+               unsigned long maddr = (unsigned long)addr;
+               unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
+               unsigned long offset = 0;
+
+               maddr &= XRANGE_SHADOW_MASK;
+               switch (xrange) {
+               case XKPRANGE_CC_SEG:
+                       offset = XKPRANGE_CC_SHADOW_OFFSET;
+                       break;
+               case XKPRANGE_UC_SEG:
+                       offset = XKPRANGE_UC_SHADOW_OFFSET;
+                       break;
+               case XKVRANGE_VC_SEG:
+                       offset = XKVRANGE_VC_SHADOW_OFFSET;
+                       break;
+               default:
+                       WARN_ON(1);
+                       return NULL;
+               }
+
+               return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset);
+       }
+}
+
+static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
+{
+       unsigned long addr = (unsigned long)shadow_addr;
+
+       if (unlikely(addr > KASAN_SHADOW_END) ||
+               unlikely(addr < KASAN_SHADOW_START)) {
+               WARN_ON(1);
+               return NULL;
+       }
+
+       if (addr >= XKVRANGE_VC_SHADOW_OFFSET)
+               return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START);
+       else if (addr >= XKPRANGE_UC_SHADOW_OFFSET)
+               return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START);
+       else if (addr >= XKPRANGE_CC_SHADOW_OFFSET)
+               return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START);
+       else {
+               WARN_ON(1);
+               return NULL;
+       }
+}
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#endif
+#endif
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
new file mode 100644 (file)
index 0000000..6c82aea
--- /dev/null
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KFENCE support for LoongArch.
+ *
+ * Author: Enze Li <lienze@kylinos.cn>
+ * Copyright (C) 2022-2023 KylinSoft Corporation.
+ */
+
+#ifndef _ASM_LOONGARCH_KFENCE_H
+#define _ASM_LOONGARCH_KFENCE_H
+
+#include <linux/kfence.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+       int err;
+       char *kfence_pool = __kfence_pool;
+       struct vm_struct *area;
+
+       area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
+                                   KFENCE_AREA_START, KFENCE_AREA_END,
+                                   __builtin_return_address(0));
+       if (!area)
+               return false;
+
+       __kfence_pool = (char *)area->addr;
+       err = ioremap_page_range((unsigned long)__kfence_pool,
+                                (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
+                                virt_to_phys((void *)kfence_pool), PAGE_KERNEL);
+       if (err) {
+               free_vm_area(area);
+               __kfence_pool = kfence_pool;
+               return false;
+       }
+
+       return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+       pte_t *pte = virt_to_kpte(addr);
+
+       if (WARN_ON(!pte) || pte_none(*pte))
+               return false;
+
+       if (protect)
+               set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
+       else
+               set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
+
+       preempt_disable();
+       local_flush_tlb_one(addr);
+       preempt_enable();
+
+       return true;
+}
+
+#endif /* _ASM_LOONGARCH_KFENCE_H */
diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h
new file mode 100644 (file)
index 0000000..2041ae5
--- /dev/null
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_KGDB_H
+#define _ASM_LOONGARCH_KGDB_H
+
+#define GDB_SIZEOF_REG         sizeof(u64)
+
+/* gdb remote procotol expects the following register layout. */
+
+/*
+ * General purpose registers:
+ *     r0-r31: 64 bit
+ *     orig_a0: 64 bit
+ *     pc : 64 bit
+ *     csr_badvaddr: 64 bit
+ */
+#define DBG_PT_REGS_BASE       0
+#define DBG_PT_REGS_NUM                35
+#define DBG_PT_REGS_END                (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1)
+
+/*
+ * Floating point registers:
+ *     f0-f31: 64 bit
+ */
+#define DBG_FPR_BASE           (DBG_PT_REGS_END + 1)
+#define DBG_FPR_NUM            32
+#define DBG_FPR_END            (DBG_FPR_BASE + DBG_FPR_NUM - 1)
+
+/*
+ * Condition Flag registers:
+ *     fcc0-fcc8: 8 bit
+ */
+#define DBG_FCC_BASE           (DBG_FPR_END + 1)
+#define DBG_FCC_NUM            8
+#define DBG_FCC_END            (DBG_FCC_BASE + DBG_FCC_NUM - 1)
+
+/*
+ * Floating-point Control and Status registers:
+ *     fcsr: 32 bit
+ */
+#define DBG_FCSR_NUM           1
+#define DBG_FCSR               (DBG_FCC_END + 1)
+
+#define DBG_MAX_REG_NUM                (DBG_FCSR + 1)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+#define BUFMAX                 2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes.
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+#define NUMREGBYTES            ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4)
+
+#define BREAK_INSTR_SIZE       4
+#define CACHE_FLUSH_IS_SAFE    0
+
+/* Register numbers of various important registers. */
+enum dbg_loongarch_regnum {
+       DBG_LOONGARCH_ZERO = 0,
+       DBG_LOONGARCH_RA,
+       DBG_LOONGARCH_TP,
+       DBG_LOONGARCH_SP,
+       DBG_LOONGARCH_A0,
+       DBG_LOONGARCH_FP = 22,
+       DBG_LOONGARCH_S0,
+       DBG_LOONGARCH_S1,
+       DBG_LOONGARCH_S2,
+       DBG_LOONGARCH_S3,
+       DBG_LOONGARCH_S4,
+       DBG_LOONGARCH_S5,
+       DBG_LOONGARCH_S6,
+       DBG_LOONGARCH_S7,
+       DBG_LOONGARCH_S8,
+       DBG_LOONGARCH_ORIG_A0,
+       DBG_LOONGARCH_PC,
+       DBG_LOONGARCH_BADV
+};
+
+void kgdb_breakinst(void);
+void arch_kgdb_breakpoint(void);
+
+#ifdef CONFIG_KGDB
+bool kgdb_breakpoint_handler(struct pt_regs *regs);
+#else /* !CONFIG_KGDB */
+static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; }
+#endif /* CONFIG_KGDB */
+
+#endif /* __ASM_KGDB_H_ */
diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h
new file mode 100644 (file)
index 0000000..e671978
--- /dev/null
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_LBT_H
+#define _ASM_LBT_H
+
+#include <asm/cpu.h>
+#include <asm/current.h>
+#include <asm/loongarch.h>
+#include <asm/processor.h>
+
+extern void _init_lbt(void);
+extern void _save_lbt(struct loongarch_lbt *);
+extern void _restore_lbt(struct loongarch_lbt *);
+
+static inline int is_lbt_enabled(void)
+{
+       if (!cpu_has_lbt)
+               return 0;
+
+       return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ?
+               1 : 0;
+}
+
+static inline int is_lbt_owner(void)
+{
+       return test_thread_flag(TIF_USEDLBT);
+}
+
+#ifdef CONFIG_CPU_HAS_LBT
+
+static inline void enable_lbt(void)
+{
+       if (cpu_has_lbt)
+               csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void disable_lbt(void)
+{
+       if (cpu_has_lbt)
+               csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void __own_lbt(void)
+{
+       enable_lbt();
+       set_thread_flag(TIF_USEDLBT);
+       KSTK_EUEN(current) |= CSR_EUEN_LBTEN;
+}
+
+static inline void own_lbt_inatomic(int restore)
+{
+       if (cpu_has_lbt && !is_lbt_owner()) {
+               __own_lbt();
+               if (restore)
+                       _restore_lbt(&current->thread.lbt);
+       }
+}
+
+static inline void own_lbt(int restore)
+{
+       preempt_disable();
+       own_lbt_inatomic(restore);
+       preempt_enable();
+}
+
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk)
+{
+       if (cpu_has_lbt && is_lbt_owner()) {
+               if (save)
+                       _save_lbt(&tsk->thread.lbt);
+
+               disable_lbt();
+               clear_tsk_thread_flag(tsk, TIF_USEDLBT);
+       }
+       KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN);
+}
+
+static inline void lose_lbt(int save)
+{
+       preempt_disable();
+       lose_lbt_inatomic(save, current);
+       preempt_enable();
+}
+
+static inline void init_lbt(void)
+{
+       __own_lbt();
+       _init_lbt();
+}
+#else
+static inline void own_lbt_inatomic(int restore) {}
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {}
+static inline void init_lbt(void) {}
+static inline void lose_lbt(int save) {}
+#endif
+
+static inline int thread_lbt_context_live(void)
+{
+       if (!cpu_has_lbt)
+               return 0;
+
+       return test_thread_flag(TIF_LBT_CTX_LIVE);
+}
+
+#endif /* _ASM_LBT_H */
index 10748a2..33531d4 100644 (file)
 #ifndef __ASSEMBLY__
 #include <larchintrin.h>
 
-/*
- * parse_r var, r - Helper assembler macro for parsing register names.
- *
- * This converts the register name in $n form provided in \r to the
- * corresponding register number, which is assigned to the variable \var. It is
- * needed to allow explicit encoding of instructions in inline assembly where
- * registers are chosen by the compiler in $n form, allowing us to avoid using
- * fixed register numbers.
- *
- * It also allows newer instructions (not implemented by the assembler) to be
- * transparently implemented using assembler macros, instead of needing separate
- * cases depending on toolchain support.
- *
- * Simple usage example:
- * __asm__ __volatile__("parse_r addr, %0\n\t"
- *                     "#invtlb op, 0, %0\n\t"
- *                     ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)"
- *                     : "=r" (status);
- */
-
-/* Match an individual register number and assign to \var */
-#define _IFC_REG(n)                            \
-       ".ifc   \\r, $r" #n "\n\t"              \
-       "\\var  = " #n "\n\t"                   \
-       ".endif\n\t"
-
-__asm__(".macro        parse_r var r\n\t"
-       "\\var  = -1\n\t"
-       _IFC_REG(0)  _IFC_REG(1)  _IFC_REG(2)  _IFC_REG(3)
-       _IFC_REG(4)  _IFC_REG(5)  _IFC_REG(6)  _IFC_REG(7)
-       _IFC_REG(8)  _IFC_REG(9)  _IFC_REG(10) _IFC_REG(11)
-       _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
-       _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
-       _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
-       _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
-       _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
-       ".iflt  \\var\n\t"
-       ".error \"Unable to parse register name \\r\"\n\t"
-       ".endif\n\t"
-       ".endm");
-
-#undef _IFC_REG
-
 /* CPUCFG */
 #define read_cpucfg(reg) __cpucfg(reg)
 
@@ -1453,6 +1410,10 @@ __BUILD_CSR_OP(tlbidx)
 #define FPU_CSR_RU     0x200   /* towards +Infinity */
 #define FPU_CSR_RD     0x300   /* towards -Infinity */
 
+/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */
+#define FPU_CSR_TM_SHIFT       0x6
+#define FPU_CSR_TM             (_ULCAST_(1) << FPU_CSR_TM_SHIFT)
+
 #define read_fcsr(source)      \
 ({     \
        unsigned int __res;     \
index fe67d0b..2b9a907 100644 (file)
@@ -13,6 +13,4 @@ extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid) (node_data[(nid)])
 
-extern void setup_zero_pages(void);
-
 #endif /* _ASM_MMZONE_H_ */
index 26e8dcc..63f137c 100644 (file)
@@ -84,7 +84,12 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define sym_to_pfn(x)          __phys_to_pfn(__pa_symbol(x))
 
 #define virt_to_pfn(kaddr)     PFN_DOWN(PHYSADDR(kaddr))
-#define virt_to_page(kaddr)    pfn_to_page(virt_to_pfn(kaddr))
+
+#define virt_to_page(kaddr)                                                            \
+({                                                                                     \
+       (likely((unsigned long)kaddr < vm_map_base)) ?                                  \
+       dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
+})
 
 extern int __virt_addr_valid(volatile void *kaddr);
 #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
index 23f5b11..79470f0 100644 (file)
@@ -94,4 +94,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 
 #endif /* __PAGETABLE_PUD_FOLDED */
 
+extern pte_t * __init populate_kernel_pte(unsigned long addr);
 #endif /* _ASM_PGALLOC_H */
index 06963a1..29d9b12 100644 (file)
@@ -70,12 +70,9 @@ struct vm_area_struct;
  * for zero-mapped memory areas etc..
  */
 
-extern unsigned long empty_zero_page;
-extern unsigned long zero_page_mask;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
-#define ZERO_PAGE(vaddr) \
-       (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
-#define __HAVE_COLOR_ZERO_PAGE
+#define ZERO_PAGE(vaddr)       virt_to_page(empty_zero_page)
 
 /*
  * TLB refill handlers may also map the vmalloc area into xkvrange.
@@ -85,14 +82,30 @@ extern unsigned long zero_page_mask;
 #define MODULES_VADDR  (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
 #define MODULES_END    (MODULES_VADDR + SZ_256M)
 
+#ifdef CONFIG_KFENCE
+#define KFENCE_AREA_SIZE       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
+#else
+#define KFENCE_AREA_SIZE       0
+#endif
+
 #define VMALLOC_START  MODULES_END
+
+#ifndef CONFIG_KASAN
 #define VMALLOC_END    \
        (vm_map_base +  \
-        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#else
+#define VMALLOC_END    \
+       (vm_map_base +  \
+        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#endif
 
 #define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
 #define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
 
+#define KFENCE_AREA_START      (VMEMMAP_END + 1)
+#define KFENCE_AREA_END                (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1)
+
 #define pte_ERROR(e) \
        pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
 #ifndef __PAGETABLE_PMD_FOLDED
@@ -350,6 +363,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 extern pgd_t swapper_pg_dir[];
 extern pgd_t invalid_pg_dir[];
 
+struct page *dmw_virt_to_page(unsigned long kaddr);
+struct page *tlb_virt_to_page(unsigned long kaddr);
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -596,6 +612,9 @@ static inline long pmd_protnone(pmd_t pmd)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+#define pmd_leaf(pmd)          ((pmd_val(pmd) & _PAGE_HUGE) != 0)
+#define pud_leaf(pud)          ((pud_val(pud) & _PAGE_HUGE) != 0)
+
 /*
  * We provide our own get_unmapped area to cope with the virtual aliasing
  * constraints placed on us by the cache architecture.
index 636e1c6..c3bc44b 100644 (file)
@@ -80,11 +80,22 @@ BUILD_FPR_ACCESS(32)
 BUILD_FPR_ACCESS(64)
 
 struct loongarch_fpu {
-       unsigned int    fcsr;
        uint64_t        fcc;    /* 8x8 */
+       uint32_t        fcsr;
+       uint32_t        ftop;
        union fpureg    fpr[NUM_FPU_REGS];
 };
 
+struct loongarch_lbt {
+       /* Scratch registers */
+       unsigned long scr0;
+       unsigned long scr1;
+       unsigned long scr2;
+       unsigned long scr3;
+       /* Eflags register */
+       unsigned long eflags;
+};
+
 #define INIT_CPUMASK { \
        {0,} \
 }
@@ -113,15 +124,6 @@ struct thread_struct {
        unsigned long csr_ecfg;
        unsigned long csr_badvaddr;     /* Last user fault */
 
-       /* Scratch registers */
-       unsigned long scr0;
-       unsigned long scr1;
-       unsigned long scr2;
-       unsigned long scr3;
-
-       /* Eflags register */
-       unsigned long eflags;
-
        /* Other stuff associated with the thread. */
        unsigned long trap_nr;
        unsigned long error_code;
@@ -133,6 +135,7 @@ struct thread_struct {
         * context because they are conditionally copied at fork().
         */
        struct loongarch_fpu fpu FPU_ALIGN;
+       struct loongarch_lbt lbt; /* Also conditionally copied */
 
        /* Hardware breakpoints pinned to this task. */
        struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
@@ -174,8 +177,9 @@ struct thread_struct {
         * FPU & vector registers                               \
         */                                                     \
        .fpu                    = {                             \
-               .fcsr           = 0,                            \
                .fcc            = 0,                            \
+               .fcsr           = 0,                            \
+               .ftop           = 0,                            \
                .fpr            = {{{0,},},},                   \
        },                                                      \
        .hbp_break              = {0},                          \
index be05c0e..a0bc159 100644 (file)
@@ -7,6 +7,7 @@
 #define _LOONGARCH_SETUP_H
 
 #include <linux/types.h>
+#include <asm/sections.h>
 #include <uapi/asm/setup.h>
 
 #define VECSIZE 0x200
@@ -33,8 +34,13 @@ extern long __la_abs_end;
 extern long __rela_dyn_begin;
 extern long __rela_dyn_end;
 
-extern void * __init relocate_kernel(void);
+extern unsigned long __init relocate_kernel(void);
 
 #endif
 
+static inline unsigned long kaslr_offset(void)
+{
+       return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS;
+}
+
 #endif /* __SETUP_H */
index 7df80e6..4fb1e64 100644 (file)
        cfi_st  u0, PT_R21, \docfi
        csrrd   u0, PERCPU_BASE_KS
 9:
+#ifdef CONFIG_KGDB
+       li.w    t0, CSR_CRMD_WE
+       csrxchg t0, t0, LOONGARCH_CSR_CRMD
+#endif
        .endm
 
        .macro  SAVE_ALL docfi=0
index 7b29cc9..5bb5a90 100644 (file)
@@ -7,11 +7,31 @@
 
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, size_t __count);
+extern void *__memset(void *__s, int __c, size_t __count);
 
 #define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
+extern void *__memcpy(void *__to, __const__ void *__from, size_t __n);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+extern void *__memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memset(s, c, n) __memset(s, c, n)
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
 
 #endif /* _ASM_STRING_H */
index 24e3094..5b225af 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 
 struct task_struct;
 
@@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
 #define switch_to(prev, next, last)                                            \
 do {                                                                           \
        lose_fpu_inatomic(1, prev);                                             \
+       lose_lbt_inatomic(1, prev);                                             \
        hw_breakpoint_thread_switch(next);                                      \
        (last) = __switch_to(prev, next, task_thread_info(next),                \
                 __builtin_return_address(0), __builtin_frame_address(0));      \
index 1a3354c..8cb653d 100644 (file)
@@ -84,6 +84,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define TIF_SINGLESTEP         16      /* Single Step */
 #define TIF_LSX_CTX_LIVE       17      /* LSX context must be preserved */
 #define TIF_LASX_CTX_LIVE      18      /* LASX context must be preserved */
+#define TIF_USEDLBT            19      /* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE       20      /* LBT context must be preserved */
 
 #define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
@@ -101,6 +103,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define _TIF_SINGLESTEP                (1<<TIF_SINGLESTEP)
 #define _TIF_LSX_CTX_LIVE      (1<<TIF_LSX_CTX_LIVE)
 #define _TIF_LASX_CTX_LIVE     (1<<TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT           (1<<TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE      (1<<TIF_LBT_CTX_LIVE)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h
new file mode 100644 (file)
index 0000000..12467ff
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_H
+#define _ASM_LOONGARCH_XOR_H
+
+#include <asm/cpu-features.h>
+#include <asm/xor_simd.h>
+
+#ifdef CONFIG_CPU_HAS_LSX
+static struct xor_block_template xor_block_lsx = {
+       .name = "lsx",
+       .do_2 = xor_lsx_2,
+       .do_3 = xor_lsx_3,
+       .do_4 = xor_lsx_4,
+       .do_5 = xor_lsx_5,
+};
+
+#define XOR_SPEED_LSX()                                        \
+       do {                                            \
+               if (cpu_has_lsx)                        \
+                       xor_speed(&xor_block_lsx);      \
+       } while (0)
+#else /* CONFIG_CPU_HAS_LSX */
+#define XOR_SPEED_LSX()
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static struct xor_block_template xor_block_lasx = {
+       .name = "lasx",
+       .do_2 = xor_lasx_2,
+       .do_3 = xor_lasx_3,
+       .do_4 = xor_lasx_4,
+       .do_5 = xor_lasx_5,
+};
+
+#define XOR_SPEED_LASX()                                       \
+       do {                                                    \
+               if (cpu_has_lasx)                               \
+                       xor_speed(&xor_block_lasx);             \
+       } while (0)
+#else /* CONFIG_CPU_HAS_LASX */
+#define XOR_SPEED_LASX()
+#endif /* CONFIG_CPU_HAS_LASX */
+
+/*
+ * For grins, also test the generic routines.
+ *
+ * More importantly: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES                              \
+do {                                                   \
+       xor_speed(&xor_block_8regs);                    \
+       xor_speed(&xor_block_8regs_p);                  \
+       xor_speed(&xor_block_32regs);                   \
+       xor_speed(&xor_block_32regs_p);                 \
+       XOR_SPEED_LSX();                                \
+       XOR_SPEED_LASX();                               \
+} while (0)
+
+#endif /* _ASM_LOONGARCH_XOR_H */
diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h
new file mode 100644 (file)
index 0000000..471b963
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_SIMD_H
+#define _ASM_LOONGARCH_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2);
+void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4);
+void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2);
+void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4);
+void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
index 06e3be5..ac915f8 100644 (file)
@@ -56,6 +56,12 @@ struct user_lasx_state {
        uint64_t vregs[32*4];
 };
 
+struct user_lbt_state {
+       uint64_t scr[4];
+       uint32_t eflags;
+       uint32_t ftop;
+};
+
 struct user_watch_state {
        uint64_t dbg_info;
        struct {
index 4cd7d16..6c22f61 100644 (file)
@@ -59,4 +59,14 @@ struct lasx_context {
        __u32   fcsr;
 };
 
+/* LBT context */
+#define LBT_CTX_MAGIC          0x42540001
+#define LBT_CTX_ALIGN          8
+struct lbt_context {
+       __u64   regs[4];
+       __u32   eflags;
+       __u32   ftop;
+};
+
+
 #endif /* _UAPI_ASM_SIGCONTEXT_H */
index 8e279f0..c56ea0b 100644 (file)
@@ -15,6 +15,8 @@ obj-$(CONFIG_EFI)             += efi.o
 
 obj-$(CONFIG_CPU_HAS_FPU)      += fpu.o kfpu.o
 
+obj-$(CONFIG_CPU_HAS_LBT)      += lbt.o
+
 obj-$(CONFIG_ARCH_STRICT_ALIGN)        += unaligned.o
 
 ifdef CONFIG_FUNCTION_TRACER
@@ -32,6 +34,12 @@ ifdef CONFIG_FUNCTION_TRACER
   CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE)
 endif
 
+KASAN_SANITIZE_efi.o := n
+KASAN_SANITIZE_cpu-probe.o := n
+KASAN_SANITIZE_traps.o := n
+KASAN_SANITIZE_smp.o := n
+KASAN_SANITIZE_vdso.o := n
+
 obj-$(CONFIG_MODULES)          += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
 
@@ -54,6 +62,7 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
 obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_regs.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 
+obj-$(CONFIG_KGDB)             += kgdb.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
 obj-$(CONFIG_RETHOOK)          += rethook.o rethook_trampoline.o
 obj-$(CONFIG_UPROBES)          += uprobes.o
index 505e4bf..8da0726 100644 (file)
@@ -118,13 +118,6 @@ void output_thread_defines(void)
        OFFSET(THREAD_CSRECFG, task_struct,
               thread.csr_ecfg);
 
-       OFFSET(THREAD_SCR0, task_struct, thread.scr0);
-       OFFSET(THREAD_SCR1, task_struct, thread.scr1);
-       OFFSET(THREAD_SCR2, task_struct, thread.scr2);
-       OFFSET(THREAD_SCR3, task_struct, thread.scr3);
-
-       OFFSET(THREAD_EFLAGS, task_struct, thread.eflags);
-
        OFFSET(THREAD_FPU, task_struct, thread.fpu);
 
        OFFSET(THREAD_BVADDR, task_struct, \
@@ -172,6 +165,17 @@ void output_thread_fpu_defines(void)
 
        OFFSET(THREAD_FCSR, loongarch_fpu, fcsr);
        OFFSET(THREAD_FCC,  loongarch_fpu, fcc);
+       OFFSET(THREAD_FTOP, loongarch_fpu, ftop);
+       BLANK();
+}
+
+void output_thread_lbt_defines(void)
+{
+       OFFSET(THREAD_SCR0,  loongarch_lbt, scr0);
+       OFFSET(THREAD_SCR1,  loongarch_lbt, scr1);
+       OFFSET(THREAD_SCR2,  loongarch_lbt, scr2);
+       OFFSET(THREAD_SCR3,  loongarch_lbt, scr3);
+       OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags);
        BLANK();
 }
 
index e925579..5532081 100644 (file)
@@ -144,6 +144,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
                c->options |= LOONGARCH_CPU_LVZ;
                elf_hwcap |= HWCAP_LOONGARCH_LVZ;
        }
+#ifdef CONFIG_CPU_HAS_LBT
+       if (config & CPUCFG2_X86BT) {
+               c->options |= LOONGARCH_CPU_LBT_X86;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_X86;
+       }
+       if (config & CPUCFG2_ARMBT) {
+               c->options |= LOONGARCH_CPU_LBT_ARM;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM;
+       }
+       if (config & CPUCFG2_MIPSBT) {
+               c->options |= LOONGARCH_CPU_LBT_MIPS;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS;
+       }
+#endif
 
        config = read_cpucfg(LOONGARCH_CPUCFG6);
        if (config & CPUCFG6_PMP)
index d737e3c..65518bb 100644 (file)
@@ -58,6 +58,11 @@ SYM_FUNC_START(handle_syscall)
 
        SAVE_STATIC
 
+#ifdef CONFIG_KGDB
+       li.w            t1, CSR_CRMD_WE
+       csrxchg         t1, t1, LOONGARCH_CSR_CRMD
+#endif
+
        move            u0, t0
        li.d            tp, ~_THREAD_MASK
        and             tp, tp, sp
index 501094a..d53ab10 100644 (file)
@@ -22,7 +22,7 @@
 
        .macro  EX insn, reg, src, offs
 .ex\@: \insn   \reg, \src, \offs
-       _asm_extable .ex\@, fault
+       _asm_extable .ex\@, .L_fpu_fault
        .endm
 
        .macro sc_save_fp base
        .macro sc_save_fcsr base, tmp0
        movfcsr2gr      \tmp0, fcsr0
        EX      st.w    \tmp0, \base, 0
+#if defined(CONFIG_CPU_HAS_LBT)
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp0, \tmp0, FPU_CSR_TM
+       beqz            \tmp0, 1f
+       x86clrtm
+1:
+#endif
        .endm
 
        .macro sc_restore_fcsr base, tmp0
@@ -309,7 +316,7 @@ EXPORT_SYMBOL(_save_fp)
  */
 SYM_FUNC_START(_restore_fp)
        fpu_restore_double      a0 t1           # clobbers t1
-       fpu_restore_csr         a0 t1
+       fpu_restore_csr         a0 t1 t2
        fpu_restore_cc          a0 t1 t2        # clobbers t1, t2
        jr                      ra
 SYM_FUNC_END(_restore_fp)
@@ -514,7 +521,6 @@ SYM_FUNC_START(_restore_lasx_context)
        jr      ra
 SYM_FUNC_END(_restore_lasx_context)
 
-SYM_FUNC_START(fault)
+.L_fpu_fault:
        li.w    a0, -EFAULT                             # failure
        jr      ra
-SYM_FUNC_END(fault)
index 5e828a8..53b883d 100644 (file)
@@ -95,12 +95,17 @@ SYM_CODE_START(kernel_entry)                        # kernel entry point
        PTR_LI          sp, (_THREAD_SIZE - PT_SIZE)
        PTR_ADD         sp, sp, tp
        set_saved_sp    sp, t0, t1
-#endif
 
-       /* relocate_kernel() returns the new kernel entry point */
-       jr              a0
-       ASM_BUG()
+       /* Jump to the new kernel: new_pc = current_pc + random_offset */
+       pcaddi          t0, 0
+       add.d           t0, t0, a0
+       jirl            zero, t0, 0xc
+#endif /* CONFIG_RANDOMIZE_BASE */
+
+#endif /* CONFIG_RELOCATABLE */
 
+#ifdef CONFIG_KASAN
+       bl              kasan_early_init
 #endif
 
        bl              start_kernel
index 5c46ae8..ec5b28e 100644 (file)
@@ -8,19 +8,40 @@
 #include <asm/fpu.h>
 #include <asm/smp.h>
 
+static unsigned int euen_mask = CSR_EUEN_FPEN;
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example.
+ */
 static DEFINE_PER_CPU(bool, in_kernel_fpu);
+static DEFINE_PER_CPU(unsigned int, euen_current);
 
 void kernel_fpu_begin(void)
 {
+       unsigned int *euen_curr;
+
        preempt_disable();
 
        WARN_ON(this_cpu_read(in_kernel_fpu));
 
        this_cpu_write(in_kernel_fpu, true);
+       euen_curr = this_cpu_ptr(&euen_current);
 
-       if (!is_fpu_owner())
-               enable_fpu();
+       *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+       if (*euen_curr & CSR_EUEN_LASXEN)
+               _save_lasx(&current->thread.fpu);
+       else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       if (*euen_curr & CSR_EUEN_LSXEN)
+               _save_lsx(&current->thread.fpu);
        else
+#endif
+       if (*euen_curr & CSR_EUEN_FPEN)
                _save_fp(&current->thread.fpu);
 
        write_fcsr(LOONGARCH_FCSR0, 0);
@@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin);
 
 void kernel_fpu_end(void)
 {
+       unsigned int *euen_curr;
+
        WARN_ON(!this_cpu_read(in_kernel_fpu));
 
-       if (!is_fpu_owner())
-               disable_fpu();
+       euen_curr = this_cpu_ptr(&euen_current);
+
+#ifdef CONFIG_CPU_HAS_LASX
+       if (*euen_curr & CSR_EUEN_LASXEN)
+               _restore_lasx(&current->thread.fpu);
        else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       if (*euen_curr & CSR_EUEN_LSXEN)
+               _restore_lsx(&current->thread.fpu);
+       else
+#endif
+       if (*euen_curr & CSR_EUEN_FPEN)
                _restore_fp(&current->thread.fpu);
 
+       *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN);
+
        this_cpu_write(in_kernel_fpu, false);
 
        preempt_enable();
 }
 EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
+static int __init init_euen_mask(void)
+{
+       if (cpu_has_lsx)
+               euen_mask |= CSR_EUEN_LSXEN;
+
+       if (cpu_has_lasx)
+               euen_mask |= CSR_EUEN_LASXEN;
+
+       return 0;
+}
+arch_initcall(init_euen_mask);
diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c
new file mode 100644 (file)
index 0000000..445c452
--- /dev/null
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * LoongArch KGDB support
+ *
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <linux/processor.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fpu.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/inst.h>
+#include <asm/irq_regs.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+
+int kgdb_watch_activated;
+static unsigned int stepped_opcode;
+static unsigned long stepped_address;
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+       { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) },
+       { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) },
+       { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) },
+       { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) },
+       { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) },
+       { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) },
+       { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) },
+       { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) },
+       { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) },
+       { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) },
+       { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) },
+       { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) },
+       { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) },
+       { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) },
+       { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) },
+       { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) },
+       { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) },
+       { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) },
+       { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) },
+       { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) },
+       { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) },
+       { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) },
+       { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) },
+       { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) },
+       { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) },
+       { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) },
+       { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) },
+       { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) },
+       { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) },
+       { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) },
+       { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) },
+       { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) },
+       { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) },
+       { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) },
+       { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) },
+       { "f0", GDB_SIZEOF_REG, 0 },
+       { "f1", GDB_SIZEOF_REG, 1 },
+       { "f2", GDB_SIZEOF_REG, 2 },
+       { "f3", GDB_SIZEOF_REG, 3 },
+       { "f4", GDB_SIZEOF_REG, 4 },
+       { "f5", GDB_SIZEOF_REG, 5 },
+       { "f6", GDB_SIZEOF_REG, 6 },
+       { "f7", GDB_SIZEOF_REG, 7 },
+       { "f8", GDB_SIZEOF_REG, 8 },
+       { "f9", GDB_SIZEOF_REG, 9 },
+       { "f10", GDB_SIZEOF_REG, 10 },
+       { "f11", GDB_SIZEOF_REG, 11 },
+       { "f12", GDB_SIZEOF_REG, 12 },
+       { "f13", GDB_SIZEOF_REG, 13 },
+       { "f14", GDB_SIZEOF_REG, 14 },
+       { "f15", GDB_SIZEOF_REG, 15 },
+       { "f16", GDB_SIZEOF_REG, 16 },
+       { "f17", GDB_SIZEOF_REG, 17 },
+       { "f18", GDB_SIZEOF_REG, 18 },
+       { "f19", GDB_SIZEOF_REG, 19 },
+       { "f20", GDB_SIZEOF_REG, 20 },
+       { "f21", GDB_SIZEOF_REG, 21 },
+       { "f22", GDB_SIZEOF_REG, 22 },
+       { "f23", GDB_SIZEOF_REG, 23 },
+       { "f24", GDB_SIZEOF_REG, 24 },
+       { "f25", GDB_SIZEOF_REG, 25 },
+       { "f26", GDB_SIZEOF_REG, 26 },
+       { "f27", GDB_SIZEOF_REG, 27 },
+       { "f28", GDB_SIZEOF_REG, 28 },
+       { "f29", GDB_SIZEOF_REG, 29 },
+       { "f30", GDB_SIZEOF_REG, 30 },
+       { "f31", GDB_SIZEOF_REG, 31 },
+       { "fcc0", 1, 0 },
+       { "fcc1", 1, 1 },
+       { "fcc2", 1, 2 },
+       { "fcc3", 1, 3 },
+       { "fcc4", 1, 4 },
+       { "fcc5", 1, 5 },
+       { "fcc6", 1, 6 },
+       { "fcc7", 1, 7 },
+       { "fcsr", 4, 0 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+       int reg_offset, reg_size;
+
+       if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+               return NULL;
+
+       reg_offset = dbg_reg_def[regno].offset;
+       reg_size = dbg_reg_def[regno].size;
+
+       if (reg_offset == -1)
+               goto out;
+
+       /* Handle general-purpose/orig_a0/pc/badv registers */
+       if (regno <= DBG_PT_REGS_END) {
+               memcpy(mem, (void *)regs + reg_offset, reg_size);
+               goto out;
+       }
+
+       if (!(regs->csr_euen & CSR_EUEN_FPEN))
+               goto out;
+
+       save_fp(current);
+
+       /* Handle FP registers */
+       switch (regno) {
+       case DBG_FCSR:                          /* Process the fcsr */
+               memcpy(mem, (void *)&current->thread.fpu.fcsr, reg_size);
+               break;
+       case DBG_FCC_BASE ... DBG_FCC_END:      /* Process the fcc */
+               memcpy(mem, (void *)&current->thread.fpu.fcc + reg_offset, reg_size);
+               break;
+       case DBG_FPR_BASE ... DBG_FPR_END:      /* Process the fpr */
+               memcpy(mem, (void *)&current->thread.fpu.fpr[reg_offset], reg_size);
+               break;
+       default:
+               break;
+       }
+
+out:
+       return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+       int reg_offset, reg_size;
+
+       if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+               return -EINVAL;
+
+       reg_offset = dbg_reg_def[regno].offset;
+       reg_size = dbg_reg_def[regno].size;
+
+       if (reg_offset == -1)
+               return 0;
+
+       /* Handle general-purpose/orig_a0/pc/badv registers */
+       if (regno <= DBG_PT_REGS_END) {
+               memcpy((void *)regs + reg_offset, mem, reg_size);
+               return 0;
+       }
+
+       if (!(regs->csr_euen & CSR_EUEN_FPEN))
+               return 0;
+
+       /* Handle FP registers */
+       switch (regno) {
+       case DBG_FCSR:                          /* Process the fcsr */
+               memcpy((void *)&current->thread.fpu.fcsr, mem, reg_size);
+               break;
+       case DBG_FCC_BASE ... DBG_FCC_END:      /* Process the fcc */
+               memcpy((void *)&current->thread.fpu.fcc + reg_offset, mem, reg_size);
+               break;
+       case DBG_FPR_BASE ... DBG_FPR_END:      /* Process the fpr */
+               memcpy((void *)&current->thread.fpu.fpr[reg_offset], mem, reg_size);
+               break;
+       default:
+               break;
+       }
+
+       restore_fp(current);
+
+       return 0;
+}
+
+/*
+ * Similar to regs_to_gdb_regs() except that process is sleeping and so
+ * we may not be able to get all the info.
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+       /* Initialize to zero */
+       memset((char *)gdb_regs, 0, NUMREGBYTES);
+
+       gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01;
+       gdb_regs[DBG_LOONGARCH_TP] = (long)p;
+       gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03;
+
+       /* S0 - S8 */
+       gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23;
+       gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24;
+       gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25;
+       gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26;
+       gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27;
+       gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28;
+       gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29;
+       gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30;
+       gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31;
+
+       /*
+        * PC use return address (RA), i.e. the moment after return from __switch_to()
+        */
+       gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+       regs->csr_era = pc;
+}
+
+void arch_kgdb_breakpoint(void)
+{
+       __asm__ __volatile__ (                  \
+               ".globl kgdb_breakinst\n\t"     \
+               "nop\n"                         \
+               "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */
+}
+
+/*
+ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
+ * then try to fall into the debugger
+ */
+static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+       struct die_args *args = (struct die_args *)ptr;
+       struct pt_regs *regs = args->regs;
+
+       /* Userspace events, ignore. */
+       if (user_mode(regs))
+               return NOTIFY_DONE;
+
+       if (!kgdb_io_module_registered)
+               return NOTIFY_DONE;
+
+       if (atomic_read(&kgdb_active) != -1)
+               kgdb_nmicallback(smp_processor_id(), regs);
+
+       if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs))
+               return NOTIFY_DONE;
+
+       if (atomic_read(&kgdb_setting_breakpoint))
+               if (regs->csr_era == (unsigned long)&kgdb_breakinst)
+                       regs->csr_era += LOONGARCH_INSN_SIZE;
+
+       return NOTIFY_STOP;
+}
+
+bool kgdb_breakpoint_handler(struct pt_regs *regs)
+{
+       struct die_args args = {
+               .regs   = regs,
+               .str    = "Break",
+               .err    = BRK_KDB,
+               .trapnr = read_csr_excode(),
+               .signr  = SIGTRAP,
+
+       };
+
+       return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false;
+}
+
+static struct notifier_block kgdb_notifier = {
+       .notifier_call = kgdb_loongarch_notify,
+};
+
+static inline void kgdb_arch_update_addr(struct pt_regs *regs,
+                                        char *remcom_in_buffer)
+{
+       unsigned long addr;
+       char *ptr;
+
+       ptr = &remcom_in_buffer[1];
+       if (kgdb_hex2long(&ptr, &addr))
+               regs->csr_era = addr;
+}
+
+/* Calculate the new address for after a step */
+static int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
+{
+       char cj_val;
+       unsigned int si, si_l, si_h, rd, rj, cj;
+       unsigned long pc = instruction_pointer(regs);
+       union loongarch_instruction *ip = (union loongarch_instruction *)pc;
+
+       if (pc & 3) {
+               pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+               return -EINVAL;
+       }
+
+       *next_addr = pc + LOONGARCH_INSN_SIZE;
+
+       si_h = ip->reg0i26_format.immediate_h;
+       si_l = ip->reg0i26_format.immediate_l;
+       switch (ip->reg0i26_format.opcode) {
+       case b_op:
+               *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+               return 0;
+       case bl_op:
+               *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+               regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
+               return 0;
+       }
+
+       rj = ip->reg1i21_format.rj;
+       cj = (rj & 0x07) + DBG_FCC_BASE;
+       si_l = ip->reg1i21_format.immediate_l;
+       si_h = ip->reg1i21_format.immediate_h;
+       dbg_get_reg(cj, &cj_val, regs);
+       switch (ip->reg1i21_format.opcode) {
+       case beqz_op:
+               if (regs->regs[rj] == 0)
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       case bnez_op:
+               if (regs->regs[rj] != 0)
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       case bceqz_op: /* bceqz_op = bcnez_op */
+               if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       }
+
+       rj = ip->reg2i16_format.rj;
+       rd = ip->reg2i16_format.rd;
+       si = ip->reg2i16_format.immediate;
+       switch (ip->reg2i16_format.opcode) {
+       case beq_op:
+               if (regs->regs[rj] == regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bne_op:
+               if (regs->regs[rj] != regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case blt_op:
+               if ((long)regs->regs[rj] < (long)regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bge_op:
+               if ((long)regs->regs[rj] >= (long)regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bltu_op:
+               if (regs->regs[rj] < regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bgeu_op:
+               if (regs->regs[rj] >= regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case jirl_op:
+               regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
+               *next_addr = regs->regs[rj] + sign_extend64(si << 2, 17);
+               return 0;
+       }
+
+       return 0;
+}
+
+static int do_single_step(struct pt_regs *regs)
+{
+       int error = 0;
+       unsigned long addr = 0; /* Determine where the target instruction will send us to */
+
+       error = get_step_address(regs, &addr);
+       if (error)
+               return error;
+
+       /* Store the opcode in the stepped address */
+       error = get_kernel_nofault(stepped_opcode, (void *)addr);
+       if (error)
+               return error;
+
+       stepped_address = addr;
+
+       /* Replace the opcode with the break instruction */
+       error = copy_to_kernel_nofault((void *)stepped_address,
+                                      arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+       flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
+
+       if (error) {
+               stepped_opcode = 0;
+               stepped_address = 0;
+       } else {
+               kgdb_single_step = 1;
+               atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+       }
+
+       return error;
+}
+
+/* Undo a single step */
+static void undo_single_step(struct pt_regs *regs)
+{
+       if (stepped_opcode) {
+               copy_to_kernel_nofault((void *)stepped_address,
+                                      (void *)&stepped_opcode, BREAK_INSTR_SIZE);
+               flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE);
+       }
+
+       stepped_opcode = 0;
+       stepped_address = 0;
+       kgdb_single_step = 0;
+       atomic_set(&kgdb_cpu_doing_single_step, -1);
+}
+
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+                              char *remcom_in_buffer, char *remcom_out_buffer,
+                              struct pt_regs *regs)
+{
+       int ret = 0;
+
+       undo_single_step(regs);
+       regs->csr_prmd |= CSR_PRMD_PWE;
+
+       switch (remcom_in_buffer[0]) {
+       case 'D':
+       case 'k':
+               regs->csr_prmd &= ~CSR_PRMD_PWE;
+               fallthrough;
+       case 'c':
+               kgdb_arch_update_addr(regs, remcom_in_buffer);
+               break;
+       case 's':
+               kgdb_arch_update_addr(regs, remcom_in_buffer);
+               ret = do_single_step(regs);
+               break;
+       default:
+               ret = -1;
+       }
+
+       return ret;
+}
+
+static struct hw_breakpoint {
+       unsigned int            enabled;
+       unsigned long           addr;
+       int                     len;
+       int                     type;
+       struct perf_event       * __percpu *pev;
+} breakinfo[LOONGARCH_MAX_BRP];
+
+static int hw_break_reserve_slot(int breakno)
+{
+       int cpu, cnt = 0;
+       struct perf_event **pevent;
+
+       for_each_online_cpu(cpu) {
+               cnt++;
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               if (dbg_reserve_bp_slot(*pevent))
+                       goto fail;
+       }
+
+       return 0;
+
+fail:
+       for_each_online_cpu(cpu) {
+               cnt--;
+               if (!cnt)
+                       break;
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               dbg_release_bp_slot(*pevent);
+       }
+
+       return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+       int cpu;
+       struct perf_event **pevent;
+
+       if (dbg_is_early)
+               return 0;
+
+       for_each_online_cpu(cpu) {
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               if (dbg_release_bp_slot(*pevent))
+                       /*
+                        * The debugger is responsible for handing the retry on
+                        * remove failure.
+                        */
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+               if (!breakinfo[i].enabled)
+                       break;
+
+       if (i == LOONGARCH_MAX_BRP)
+               return -1;
+
+       switch (bptype) {
+       case BP_HARDWARE_BREAKPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_X;
+               break;
+       case BP_READ_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_R;
+               break;
+       case BP_WRITE_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_W;
+               break;
+       case BP_ACCESS_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_RW;
+               break;
+       default:
+               return -1;
+       }
+
+       switch (len) {
+       case 1:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_1;
+               break;
+       case 2:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_2;
+               break;
+       case 4:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_4;
+               break;
+       case 8:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_8;
+               break;
+       default:
+               return -1;
+       }
+
+       breakinfo[i].addr = addr;
+       if (hw_break_reserve_slot(i)) {
+               breakinfo[i].addr = 0;
+               return -1;
+       }
+       breakinfo[i].enabled = 1;
+
+       return 0;
+}
+
+static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+               if (breakinfo[i].addr == addr && breakinfo[i].enabled)
+                       break;
+
+       if (i == LOONGARCH_MAX_BRP)
+               return -1;
+
+       if (hw_break_release_slot(i)) {
+               pr_err("Cannot remove hw breakpoint at %lx\n", addr);
+               return -1;
+       }
+       breakinfo[i].enabled = 0;
+
+       return 0;
+}
+
+static void kgdb_disable_hw_break(struct pt_regs *regs)
+{
+       int i;
+       int cpu = raw_smp_processor_id();
+       struct perf_event *bp;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (bp->attr.disabled == 1)
+                       continue;
+
+               arch_uninstall_hw_breakpoint(bp);
+               bp->attr.disabled = 1;
+       }
+
+       /* Disable hardware debugging while we are in kgdb */
+       csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+}
+
+static void kgdb_remove_all_hw_break(void)
+{
+       int i;
+       int cpu = raw_smp_processor_id();
+       struct perf_event *bp;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (!bp->attr.disabled) {
+                       arch_uninstall_hw_breakpoint(bp);
+                       bp->attr.disabled = 1;
+                       continue;
+               }
+
+               if (hw_break_release_slot(i))
+                       pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr);
+               breakinfo[i].enabled = 0;
+       }
+
+       csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+       kgdb_watch_activated = 0;
+}
+
+static void kgdb_correct_hw_break(void)
+{
+       int i, activated = 0;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               struct perf_event *bp;
+               int val;
+               int cpu = raw_smp_processor_id();
+
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (bp->attr.disabled != 1)
+                       continue;
+
+               bp->attr.bp_addr = breakinfo[i].addr;
+               bp->attr.bp_len = breakinfo[i].len;
+               bp->attr.bp_type = breakinfo[i].type;
+
+               val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp));
+               if (val)
+                       return;
+
+               val = arch_install_hw_breakpoint(bp);
+               if (!val)
+                       bp->attr.disabled = 0;
+               activated = 1;
+       }
+
+       csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+       kgdb_watch_activated = activated;
+}
+
+const struct kgdb_arch arch_kgdb_ops = {
+       .gdb_bpt_instr          = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */
+       .flags                  = KGDB_HW_BREAKPOINT,
+       .set_hw_breakpoint      = kgdb_set_hw_break,
+       .remove_hw_breakpoint   = kgdb_remove_hw_break,
+       .disable_hw_break       = kgdb_disable_hw_break,
+       .remove_all_hw_break    = kgdb_remove_all_hw_break,
+       .correct_hw_break       = kgdb_correct_hw_break,
+};
+
+int kgdb_arch_init(void)
+{
+       return register_die_notifier(&kgdb_notifier);
+}
+
+void kgdb_arch_late(void)
+{
+       int i, cpu;
+       struct perf_event_attr attr;
+       struct perf_event **pevent;
+
+       hw_breakpoint_init(&attr);
+
+       attr.bp_addr = (unsigned long)kgdb_arch_init;
+       attr.bp_len = HW_BREAKPOINT_LEN_4;
+       attr.bp_type = HW_BREAKPOINT_W;
+       attr.disabled = 1;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (breakinfo[i].pev)
+                       continue;
+
+               breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
+               if (IS_ERR((void * __force)breakinfo[i].pev)) {
+                       pr_err("kgdb: Could not allocate hw breakpoints.\n");
+                       breakinfo[i].pev = NULL;
+                       return;
+               }
+
+               for_each_online_cpu(cpu) {
+                       pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
+                       if (pevent[0]->destroy) {
+                               pevent[0]->destroy = NULL;
+                               release_bp_slot(*pevent);
+                       }
+               }
+       }
+}
+
+void kgdb_arch_exit(void)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (breakinfo[i].pev) {
+                       unregister_wide_hw_breakpoint(breakinfo[i].pev);
+                       breakinfo[i].pev = NULL;
+               }
+       }
+
+       unregister_die_notifier(&kgdb_notifier);
+}
diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
new file mode 100644 (file)
index 0000000..9c75120
--- /dev/null
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ *
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/regdef.h>
+
+#define SCR_REG_WIDTH 8
+
+       .macro  EX insn, reg, src, offs
+.ex\@: \insn   \reg, \src, \offs
+       _asm_extable .ex\@, .L_lbt_fault
+       .endm
+
+/*
+ * Save a thread's lbt context.
+ */
+SYM_FUNC_START(_save_lbt)
+       movscr2gr       t1, $scr0               # save scr
+       stptr.d         t1, a0, THREAD_SCR0
+       movscr2gr       t1, $scr1
+       stptr.d         t1, a0, THREAD_SCR1
+       movscr2gr       t1, $scr2
+       stptr.d         t1, a0, THREAD_SCR2
+       movscr2gr       t1, $scr3
+       stptr.d         t1, a0, THREAD_SCR3
+
+       x86mfflag       t1, 0x3f                # save eflags
+       stptr.d         t1, a0, THREAD_EFLAGS
+       jr              ra
+SYM_FUNC_END(_save_lbt)
+EXPORT_SYMBOL(_save_lbt)
+
+/*
+ * Restore a thread's lbt context.
+ */
+SYM_FUNC_START(_restore_lbt)
+       ldptr.d         t1, a0, THREAD_SCR0     # restore scr
+       movgr2scr       $scr0, t1
+       ldptr.d         t1, a0, THREAD_SCR1
+       movgr2scr       $scr1, t1
+       ldptr.d         t1, a0, THREAD_SCR2
+       movgr2scr       $scr2, t1
+       ldptr.d         t1, a0, THREAD_SCR3
+       movgr2scr       $scr3, t1
+
+       ldptr.d         t1, a0, THREAD_EFLAGS   # restore eflags
+       x86mtflag       t1, 0x3f
+       jr              ra
+SYM_FUNC_END(_restore_lbt)
+EXPORT_SYMBOL(_restore_lbt)
+
+/*
+ * Load scr/eflag with zero.
+ */
+SYM_FUNC_START(_init_lbt)
+       movgr2scr       $scr0, zero
+       movgr2scr       $scr1, zero
+       movgr2scr       $scr2, zero
+       movgr2scr       $scr3, zero
+
+       x86mtflag       zero, 0x3f
+       jr              ra
+SYM_FUNC_END(_init_lbt)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_save_lbt_context)
+       movscr2gr       t1, $scr0               # save scr
+       EX      st.d    t1, a0, (0 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr1
+       EX      st.d    t1, a0, (1 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr2
+       EX      st.d    t1, a0, (2 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr3
+       EX      st.d    t1, a0, (3 * SCR_REG_WIDTH)
+
+       x86mfflag       t1, 0x3f                # save eflags
+       EX      st.w    t1, a1, 0
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_save_lbt_context)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_restore_lbt_context)
+       EX      ld.d    t1, a0, (0 * SCR_REG_WIDTH)     # restore scr
+       movgr2scr       $scr0, t1
+       EX      ld.d    t1, a0, (1 * SCR_REG_WIDTH)
+       movgr2scr       $scr1, t1
+       EX      ld.d    t1, a0, (2 * SCR_REG_WIDTH)
+       movgr2scr       $scr2, t1
+       EX      ld.d    t1, a0, (3 * SCR_REG_WIDTH)
+       movgr2scr       $scr3, t1
+
+       EX      ld.w    t1, a1, 0                       # restore eflags
+       x86mtflag       t1, 0x3f
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_restore_lbt_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_save_ftop_context)
+       x86mftop        t1
+       st.w            t1, a0, 0
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_save_ftop_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_restore_ftop_context)
+       ld.w            t1, a0, 0
+       andi            t1, t1, 0x7
+       la.pcrel        a0, 1f
+       alsl.d          a0, t1, a0, 3
+       jr              a0
+1:
+       x86mttop        0
+       b       2f
+       x86mttop        1
+       b       2f
+       x86mttop        2
+       b       2f
+       x86mttop        3
+       b       2f
+       x86mttop        4
+       b       2f
+       x86mttop        5
+       b       2f
+       x86mttop        6
+       b       2f
+       x86mttop        7
+2:
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_restore_ftop_context)
+
+.L_lbt_fault:
+       li.w            a0, -EFAULT             # failure
+       jr              ra
index 7086658..c7d33c4 100644 (file)
@@ -67,39 +67,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 
 void __init pcpu_populate_pte(unsigned long addr)
 {
-       pgd_t *pgd = pgd_offset_k(addr);
-       p4d_t *p4d = p4d_offset(pgd, addr);
-       pud_t *pud;
-       pmd_t *pmd;
-
-       if (p4d_none(*p4d)) {
-               pud_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pgd_populate(&init_mm, pgd, new);
-#ifndef __PAGETABLE_PUD_FOLDED
-               pud_init(new);
-#endif
-       }
-
-       pud = pud_offset(p4d, addr);
-       if (pud_none(*pud)) {
-               pmd_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pud_populate(&init_mm, pud, new);
-#ifndef __PAGETABLE_PMD_FOLDED
-               pmd_init(new);
-#endif
-       }
-
-       pmd = pmd_offset(pud, addr);
-       if (!pmd_present(*pmd)) {
-               pte_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pmd_populate_kernel(&init_mm, pmd, new);
-       }
+       populate_kernel_pte(addr);
 }
 
 void __init setup_per_cpu_areas(void)
@@ -470,7 +438,6 @@ void __init mem_init(void)
 {
        high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
        memblock_free_all();
-       setup_zero_pages();     /* This comes from node 0 */
 }
 
 int pcibus_to_node(struct pci_bus *bus)
index ba457e4..3cb082e 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/elf.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
@@ -82,9 +83,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
        euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
        regs->csr_euen = euen;
        lose_fpu(0);
+       lose_lbt(0);
 
        clear_thread_flag(TIF_LSX_CTX_LIVE);
        clear_thread_flag(TIF_LASX_CTX_LIVE);
+       clear_thread_flag(TIF_LBT_CTX_LIVE);
        clear_used_math();
        regs->csr_era = pc;
        regs->regs[3] = sp;
@@ -121,10 +124,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
        preempt_enable();
 
-       if (used_math())
-               memcpy(dst, src, sizeof(struct task_struct));
-       else
+       if (!used_math())
                memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
+       else
+               memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0));
+
+#ifdef CONFIG_CPU_HAS_LBT
+       memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt));
+#endif
 
        return 0;
 }
@@ -189,8 +196,10 @@ out:
        ptrace_hw_copy_thread(p);
        clear_tsk_thread_flag(p, TIF_USEDFPU);
        clear_tsk_thread_flag(p, TIF_USEDSIMD);
+       clear_tsk_thread_flag(p, TIF_USEDLBT);
        clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
        clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
+       clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE);
 
        return 0;
 }
index f72adbf..c114c5e 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/loongarch.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -338,6 +339,46 @@ static int simd_set(struct task_struct *target,
 
 #endif /* CONFIG_CPU_HAS_LSX */
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int lbt_get(struct task_struct *target,
+                  const struct user_regset *regset,
+                  struct membuf to)
+{
+       int r;
+
+       r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0));
+       r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1));
+       r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2));
+       r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3));
+       r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32));
+       r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32));
+
+       return r;
+}
+
+static int lbt_set(struct task_struct *target,
+                  const struct user_regset *regset,
+                  unsigned int pos, unsigned int count,
+                  const void *kbuf, const void __user *ubuf)
+{
+       int err = 0;
+       const int eflags_start = 4 * sizeof(target->thread.lbt.scr0);
+       const int ftop_start = eflags_start + sizeof(u32);
+
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.lbt.scr0,
+                                 0, 4 * sizeof(target->thread.lbt.scr0));
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.lbt.eflags,
+                                 eflags_start, ftop_start);
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.fpu.ftop,
+                                 ftop_start, ftop_start + sizeof(u32));
+
+       return err;
+}
+#endif /* CONFIG_CPU_HAS_LBT */
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 /*
@@ -802,6 +843,9 @@ enum loongarch_regset {
 #ifdef CONFIG_CPU_HAS_LASX
        REGSET_LASX,
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+       REGSET_LBT,
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        REGSET_HW_BREAK,
        REGSET_HW_WATCH,
@@ -853,6 +897,16 @@ static const struct user_regset loongarch64_regsets[] = {
                .set            = simd_set,
        },
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+       [REGSET_LBT] = {
+               .core_note_type = NT_LOONGARCH_LBT,
+               .n              = 5,
+               .size           = sizeof(u64),
+               .align          = sizeof(u64),
+               .regset_get     = lbt_get,
+               .set            = lbt_set,
+       },
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        [REGSET_HW_BREAK] = {
                .core_note_type = NT_LOONGARCH_HW_BREAK,
index 01f94d1..6c3eff9 100644 (file)
@@ -157,12 +157,11 @@ static inline void __init update_reloc_offset(unsigned long *addr, long random_o
        *new_addr = (unsigned long)reloc_offset;
 }
 
-void * __init relocate_kernel(void)
+unsigned long __init relocate_kernel(void)
 {
        unsigned long kernel_length;
        unsigned long random_offset = 0;
        void *location_new = _text; /* Default to original kernel start */
-       void *kernel_entry = start_kernel; /* Default to original kernel entry point */
        char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
 
        strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
@@ -190,9 +189,6 @@ void * __init relocate_kernel(void)
 
                reloc_offset += random_offset;
 
-               /* Return the new kernel's entry point */
-               kernel_entry = RELOCATED_KASLR(start_kernel);
-
                /* The current thread is now within the relocated kernel */
                __current_thread_info = RELOCATED_KASLR(__current_thread_info);
 
@@ -204,7 +200,7 @@ void * __init relocate_kernel(void)
 
        relocate_absolute(random_offset);
 
-       return kernel_entry;
+       return random_offset;
 }
 
 /*
index 9d830ab..7783f0a 100644 (file)
@@ -626,4 +626,8 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        paging_init();
+
+#ifdef CONFIG_KASAN
+       kasan_init();
+#endif
 }
index ceb8993..504fdfe 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/ucontext.h>
 #include <asm/vdso.h>
 
@@ -44,6 +45,9 @@
 /* Make sure we will not lose FPU ownership */
 #define lock_fpu_owner()       ({ preempt_disable(); pagefault_disable(); })
 #define unlock_fpu_owner()     ({ pagefault_enable(); preempt_enable(); })
+/* Make sure we will not lose LBT ownership */
+#define lock_lbt_owner()       ({ preempt_disable(); pagefault_disable(); })
+#define unlock_lbt_owner()     ({ pagefault_enable(); preempt_enable(); })
 
 /* Assembly functions to move context to/from the FPU */
 extern asmlinkage int
@@ -59,6 +63,13 @@ _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 extern asmlinkage int
 _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 
+#ifdef CONFIG_CPU_HAS_LBT
+extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _save_ftop_context(void __user *ftop);
+extern asmlinkage int _restore_ftop_context(void __user *ftop);
+#endif
+
 struct rt_sigframe {
        struct siginfo rs_info;
        struct ucontext rs_uctx;
@@ -75,6 +86,7 @@ struct extctx_layout {
        struct _ctx_layout fpu;
        struct _ctx_layout lsx;
        struct _ctx_layout lasx;
+       struct _ctx_layout lbt;
        struct _ctx_layout end;
 };
 
@@ -215,6 +227,52 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
        return err;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx)
+{
+       int err = 0;
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       err |= __put_user(current->thread.lbt.scr0, &regs[0]);
+       err |= __put_user(current->thread.lbt.scr1, &regs[1]);
+       err |= __put_user(current->thread.lbt.scr2, &regs[2]);
+       err |= __put_user(current->thread.lbt.scr3, &regs[3]);
+       err |= __put_user(current->thread.lbt.eflags, eflags);
+
+       return err;
+}
+
+static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx)
+{
+       int err = 0;
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       err |= __get_user(current->thread.lbt.scr0, &regs[0]);
+       err |= __get_user(current->thread.lbt.scr1, &regs[1]);
+       err |= __get_user(current->thread.lbt.scr2, &regs[2]);
+       err |= __get_user(current->thread.lbt.scr3, &regs[3]);
+       err |= __get_user(current->thread.lbt.eflags, eflags);
+
+       return err;
+}
+
+static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx)
+{
+       uint32_t  __user *ftop  = &ctx->ftop;
+
+       return __put_user(current->thread.fpu.ftop, ftop);
+}
+
+static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx)
+{
+       uint32_t  __user *ftop  = &ctx->ftop;
+
+       return __get_user(current->thread.fpu.ftop, ftop);
+}
+#endif
+
 /*
  * Wrappers for the assembly _{save,restore}_fp_context functions.
  */
@@ -272,6 +330,41 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx)
        return _restore_lasx_context(regs, fcc, fcsr);
 }
 
+/*
+ * Wrappers for the assembly _{save,restore}_lbt_context functions.
+ */
+#ifdef CONFIG_CPU_HAS_LBT
+static int save_hw_lbt_context(struct lbt_context __user *ctx)
+{
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       return _save_lbt_context(regs, eflags);
+}
+
+static int restore_hw_lbt_context(struct lbt_context __user *ctx)
+{
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       return _restore_lbt_context(regs, eflags);
+}
+
+static int save_hw_ftop_context(struct lbt_context __user *ctx)
+{
+       uint32_t __user *ftop   = &ctx->ftop;
+
+       return _save_ftop_context(ftop);
+}
+
+static int restore_hw_ftop_context(struct lbt_context __user *ctx)
+{
+       uint32_t __user *ftop   = &ctx->ftop;
+
+       return _restore_ftop_context(ftop);
+}
+#endif
+
 static int fcsr_pending(unsigned int __user *fcsr)
 {
        int err, sig = 0;
@@ -519,6 +612,77 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx)
        return err ?: sig;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int protected_save_lbt_context(struct extctx_layout *extctx)
+{
+       int err = 0;
+       struct sctx_info __user *info = extctx->lbt.addr;
+       struct lbt_context __user *lbt_ctx =
+               (struct lbt_context *)get_ctx_through_ctxinfo(info);
+       uint64_t __user *regs   = (uint64_t *)&lbt_ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+       while (1) {
+               lock_lbt_owner();
+               if (is_lbt_owner())
+                       err |= save_hw_lbt_context(lbt_ctx);
+               else
+                       err |= copy_lbt_to_sigcontext(lbt_ctx);
+               if (is_fpu_owner())
+                       err |= save_hw_ftop_context(lbt_ctx);
+               else
+                       err |= copy_ftop_to_sigcontext(lbt_ctx);
+               unlock_lbt_owner();
+
+               err |= __put_user(LBT_CTX_MAGIC, &info->magic);
+               err |= __put_user(extctx->lbt.size, &info->size);
+
+               if (likely(!err))
+                       break;
+               /* Touch the LBT context and try again */
+               err = __put_user(0, &regs[0]) | __put_user(0, eflags);
+
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+
+static int protected_restore_lbt_context(struct extctx_layout *extctx)
+{
+       int err = 0, tmp __maybe_unused;
+       struct sctx_info __user *info = extctx->lbt.addr;
+       struct lbt_context __user *lbt_ctx =
+               (struct lbt_context *)get_ctx_through_ctxinfo(info);
+       uint64_t __user *regs   = (uint64_t *)&lbt_ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+       while (1) {
+               lock_lbt_owner();
+               if (is_lbt_owner())
+                       err |= restore_hw_lbt_context(lbt_ctx);
+               else
+                       err |= copy_lbt_from_sigcontext(lbt_ctx);
+               if (is_fpu_owner())
+                       err |= restore_hw_ftop_context(lbt_ctx);
+               else
+                       err |= copy_ftop_from_sigcontext(lbt_ctx);
+               unlock_lbt_owner();
+
+               if (likely(!err))
+                       break;
+               /* Touch the LBT context and try again */
+               err = __get_user(tmp, &regs[0]) | __get_user(tmp, eflags);
+
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+#endif
+
 static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
                            struct extctx_layout *extctx)
 {
@@ -539,6 +703,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
        else if (extctx->fpu.addr)
                err |= protected_save_fpu_context(extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (extctx->lbt.addr)
+               err |= protected_save_lbt_context(extctx);
+#endif
+
        /* Set the "end" magic */
        info = (struct sctx_info *)extctx->end.addr;
        err |= __put_user(0, &info->magic);
@@ -584,6 +753,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
                        extctx->lasx.addr = info;
                        break;
 
+               case LBT_CTX_MAGIC:
+                       if (size < (sizeof(struct sctx_info) +
+                                   sizeof(struct lbt_context)))
+                               goto invalid;
+                       extctx->lbt.addr = info;
+                       break;
+
                default:
                        goto invalid;
                }
@@ -636,6 +812,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
        else if (extctx.fpu.addr)
                err |= protected_restore_fpu_context(&extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (extctx.lbt.addr)
+               err |= protected_restore_lbt_context(&extctx);
+#endif
+
 bad:
        return err;
 }
@@ -700,6 +881,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
                          sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
        }
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (cpu_has_lbt && thread_lbt_context_live()) {
+               new_sp = extframe_alloc(extctx, &extctx->lbt,
+                         sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp);
+       }
+#endif
+
        return new_sp;
 }
 
index 2463d2f..92270f1 100644 (file)
@@ -18,17 +18,19 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
        struct pt_regs dummyregs;
        struct unwind_state state;
 
-       regs = &dummyregs;
+       if (!regs) {
+               regs = &dummyregs;
 
-       if (task == current) {
-               regs->regs[3] = (unsigned long)__builtin_frame_address(0);
-               regs->csr_era = (unsigned long)__builtin_return_address(0);
-       } else {
-               regs->regs[3] = thread_saved_fp(task);
-               regs->csr_era = thread_saved_ra(task);
+               if (task == current) {
+                       regs->regs[3] = (unsigned long)__builtin_frame_address(0);
+                       regs->csr_era = (unsigned long)__builtin_return_address(0);
+               } else {
+                       regs->regs[3] = thread_saved_fp(task);
+                       regs->csr_era = thread_saved_ra(task);
+               }
+               regs->regs[1] = 0;
        }
 
-       regs->regs[1] = 0;
        for (unwind_start(&state, task, regs);
             !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
                addr = unwind_get_return_address(&state);
index 89699db..6521477 100644 (file)
@@ -36,7 +36,9 @@
 #include <asm/break.h>
 #include <asm/cpu.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/inst.h>
+#include <asm/kgdb.h>
 #include <asm/loongarch.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
@@ -702,6 +704,11 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
         * pertain to them.
         */
        switch (bcode) {
+       case BRK_KDB:
+               if (kgdb_breakpoint_handler(regs))
+                       goto out;
+               else
+                       break;
        case BRK_KPROBE_BP:
                if (kprobe_breakpoint_handler(regs))
                        goto out;
@@ -768,6 +775,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs)
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
        pr_warn("Hardware watch point handler not implemented!\n");
 #else
+       if (kgdb_breakpoint_handler(regs))
+               goto out;
+
        if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
                int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
                unsigned long pc = instruction_pointer(regs);
@@ -966,13 +976,47 @@ out:
        irqentry_exit(regs, state);
 }
 
+static void init_restore_lbt(void)
+{
+       if (!thread_lbt_context_live()) {
+               /* First time LBT context user */
+               init_lbt();
+               set_thread_flag(TIF_LBT_CTX_LIVE);
+       } else {
+               if (!is_lbt_owner())
+                       own_lbt_inatomic(1);
+       }
+
+       BUG_ON(!is_lbt_enabled());
+}
+
 asmlinkage void noinstr do_lbt(struct pt_regs *regs)
 {
        irqentry_state_t state = irqentry_enter(regs);
 
-       local_irq_enable();
-       force_sig(SIGILL);
-       local_irq_disable();
+       /*
+        * BTD (Binary Translation Disable exception) can be triggered
+        * during FP save/restore if TM (Top Mode) is on, which may
+        * cause irq_enable during 'switch_to'. To avoid this situation
+        * (including the user using 'MOVGR2GCSR' to turn on TM, which
+        * will not trigger the BTE), we need to check PRMD first.
+        */
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_enable();
+
+       if (!cpu_has_lbt) {
+               force_sig(SIGILL);
+               goto out;
+       }
+       BUG_ON(is_lbt_enabled());
+
+       preempt_disable();
+       init_restore_lbt();
+       preempt_enable();
+
+out:
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_disable();
 
        irqentry_exit(regs, state);
 }
index d60d4e0..a77bf16 100644 (file)
@@ -6,4 +6,6 @@
 lib-y  += delay.o memset.o memcpy.o memmove.o \
           clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
 
+obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
+
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
index 0790ead..be74154 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-       sub.d   a0, a2, a0
-       addi.d  a0, a0, (\to) * (-8)
-       jr      ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-       addi.d  a0, a1, -\to
-       jr      ra
-.endr
-
 SYM_FUNC_START(__clear_user)
        /*
         * Some CPUs support hardware unaligned access
@@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic)
 2:     move    a0, a1
        jr      ra
 
-       _asm_extable 1b, .L_fixup_handle_s0
+       _asm_extable 1b, 2b
 SYM_FUNC_END(__clear_user_generic)
 
 /*
@@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast)
        jr      ra
 
        /* fixup and ex_table */
-       _asm_extable 0b, .L_fixup_handle_0
-       _asm_extable 1b, .L_fixup_handle_0
-       _asm_extable 2b, .L_fixup_handle_1
-       _asm_extable 3b, .L_fixup_handle_2
-       _asm_extable 4b, .L_fixup_handle_3
-       _asm_extable 5b, .L_fixup_handle_4
-       _asm_extable 6b, .L_fixup_handle_5
-       _asm_extable 7b, .L_fixup_handle_6
-       _asm_extable 8b, .L_fixup_handle_7
-       _asm_extable 9b, .L_fixup_handle_0
-       _asm_extable 10b, .L_fixup_handle_1
-       _asm_extable 11b, .L_fixup_handle_2
-       _asm_extable 12b, .L_fixup_handle_3
-       _asm_extable 13b, .L_fixup_handle_0
-       _asm_extable 14b, .L_fixup_handle_1
-       _asm_extable 15b, .L_fixup_handle_0
-       _asm_extable 16b, .L_fixup_handle_0
-       _asm_extable 17b, .L_fixup_handle_s0
-       _asm_extable 18b, .L_fixup_handle_s0
-       _asm_extable 19b, .L_fixup_handle_s0
-       _asm_extable 20b, .L_fixup_handle_s2
-       _asm_extable 21b, .L_fixup_handle_s0
-       _asm_extable 22b, .L_fixup_handle_s0
-       _asm_extable 23b, .L_fixup_handle_s4
-       _asm_extable 24b, .L_fixup_handle_s0
-       _asm_extable 25b, .L_fixup_handle_s4
-       _asm_extable 26b, .L_fixup_handle_s0
-       _asm_extable 27b, .L_fixup_handle_s4
-       _asm_extable 28b, .L_fixup_handle_s0
+.Llarge_fixup:
+       sub.d   a1, a2, a0
+
+.Lsmall_fixup:
+29:    st.b    zero, a0, 0
+       addi.d  a0, a0, 1
+       addi.d  a1, a1, -1
+       bgt     a1, zero, 29b
+
+.Lexit:
+       move    a0, a1
+       jr      ra
+
+       _asm_extable 0b, .Lsmall_fixup
+       _asm_extable 1b, .Llarge_fixup
+       _asm_extable 2b, .Llarge_fixup
+       _asm_extable 3b, .Llarge_fixup
+       _asm_extable 4b, .Llarge_fixup
+       _asm_extable 5b, .Llarge_fixup
+       _asm_extable 6b, .Llarge_fixup
+       _asm_extable 7b, .Llarge_fixup
+       _asm_extable 8b, .Llarge_fixup
+       _asm_extable 9b, .Llarge_fixup
+       _asm_extable 10b, .Llarge_fixup
+       _asm_extable 11b, .Llarge_fixup
+       _asm_extable 12b, .Llarge_fixup
+       _asm_extable 13b, .Llarge_fixup
+       _asm_extable 14b, .Llarge_fixup
+       _asm_extable 15b, .Llarge_fixup
+       _asm_extable 16b, .Llarge_fixup
+       _asm_extable 17b, .Lexit
+       _asm_extable 18b, .Lsmall_fixup
+       _asm_extable 19b, .Lsmall_fixup
+       _asm_extable 20b, .Lsmall_fixup
+       _asm_extable 21b, .Lsmall_fixup
+       _asm_extable 22b, .Lsmall_fixup
+       _asm_extable 23b, .Lsmall_fixup
+       _asm_extable 24b, .Lsmall_fixup
+       _asm_extable 25b, .Lsmall_fixup
+       _asm_extable 26b, .Lsmall_fixup
+       _asm_extable 27b, .Lsmall_fixup
+       _asm_extable 28b, .Lsmall_fixup
+       _asm_extable 29b, .Lexit
 SYM_FUNC_END(__clear_user_fast)
index bfe3d27..feec3d3 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-       sub.d   a0, a2, a0
-       addi.d  a0, a0, (\to) * (-8)
-       jr      ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-       addi.d  a0, a2, -\to
-       jr      ra
-.endr
-
 SYM_FUNC_START(__copy_user)
        /*
         * Some CPUs support hardware unaligned access
@@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic)
 3:     move    a0, a2
        jr      ra
 
-       _asm_extable 1b, .L_fixup_handle_s0
-       _asm_extable 2b, .L_fixup_handle_s0
+       _asm_extable 1b, 3b
+       _asm_extable 2b, 3b
 SYM_FUNC_END(__copy_user_generic)
 
 /*
@@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast)
        sltui   t0, a2, 9
        bnez    t0, .Lsmall
 
-       add.d   a3, a1, a2
-       add.d   a2, a0, a2
 0:     ld.d    t0, a1, 0
 1:     st.d    t0, a0, 0
+       add.d   a3, a1, a2
+       add.d   a2, a0, a2
 
        /* align up destination address */
        andi    t1, a0, 7
@@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast)
 7:     ld.d    t5, a1, 40
 8:     ld.d    t6, a1, 48
 9:     ld.d    t7, a1, 56
-       addi.d  a1, a1, 64
 10:    st.d    t0, a0, 0
 11:    st.d    t1, a0, 8
 12:    st.d    t2, a0, 16
@@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast)
 15:    st.d    t5, a0, 40
 16:    st.d    t6, a0, 48
 17:    st.d    t7, a0, 56
+       addi.d  a1, a1, 64
        addi.d  a0, a0, 64
        bltu    a1, a4, .Lloop64
 
@@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast)
 19:    ld.d    t1, a1, 8
 20:    ld.d    t2, a1, 16
 21:    ld.d    t3, a1, 24
-       addi.d  a1, a1, 32
 22:    st.d    t0, a0, 0
 23:    st.d    t1, a0, 8
 24:    st.d    t2, a0, 16
 25:    st.d    t3, a0, 24
+       addi.d  a1, a1, 32
        addi.d  a0, a0, 32
 
 .Llt32:
@@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast)
        bgeu    a1, a4, .Llt16
 26:    ld.d    t0, a1, 0
 27:    ld.d    t1, a1, 8
-       addi.d  a1, a1, 16
 28:    st.d    t0, a0, 0
 29:    st.d    t1, a0, 8
+       addi.d  a1, a1, 16
        addi.d  a0, a0, 16
 
 .Llt16:
@@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast)
        bgeu    a1, a4, .Llt8
 30:    ld.d    t0, a1, 0
 31:    st.d    t0, a0, 0
+       addi.d  a1, a1, 8
        addi.d  a0, a0, 8
 
 .Llt8:
@@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast)
        jr      ra
 
        /* fixup and ex_table */
-       _asm_extable 0b, .L_fixup_handle_0
-       _asm_extable 1b, .L_fixup_handle_0
-       _asm_extable 2b, .L_fixup_handle_0
-       _asm_extable 3b, .L_fixup_handle_0
-       _asm_extable 4b, .L_fixup_handle_0
-       _asm_extable 5b, .L_fixup_handle_0
-       _asm_extable 6b, .L_fixup_handle_0
-       _asm_extable 7b, .L_fixup_handle_0
-       _asm_extable 8b, .L_fixup_handle_0
-       _asm_extable 9b, .L_fixup_handle_0
-       _asm_extable 10b, .L_fixup_handle_0
-       _asm_extable 11b, .L_fixup_handle_1
-       _asm_extable 12b, .L_fixup_handle_2
-       _asm_extable 13b, .L_fixup_handle_3
-       _asm_extable 14b, .L_fixup_handle_4
-       _asm_extable 15b, .L_fixup_handle_5
-       _asm_extable 16b, .L_fixup_handle_6
-       _asm_extable 17b, .L_fixup_handle_7
-       _asm_extable 18b, .L_fixup_handle_0
-       _asm_extable 19b, .L_fixup_handle_0
-       _asm_extable 20b, .L_fixup_handle_0
-       _asm_extable 21b, .L_fixup_handle_0
-       _asm_extable 22b, .L_fixup_handle_0
-       _asm_extable 23b, .L_fixup_handle_1
-       _asm_extable 24b, .L_fixup_handle_2
-       _asm_extable 25b, .L_fixup_handle_3
-       _asm_extable 26b, .L_fixup_handle_0
-       _asm_extable 27b, .L_fixup_handle_0
-       _asm_extable 28b, .L_fixup_handle_0
-       _asm_extable 29b, .L_fixup_handle_1
-       _asm_extable 30b, .L_fixup_handle_0
-       _asm_extable 31b, .L_fixup_handle_0
-       _asm_extable 32b, .L_fixup_handle_0
-       _asm_extable 33b, .L_fixup_handle_0
-       _asm_extable 34b, .L_fixup_handle_s0
-       _asm_extable 35b, .L_fixup_handle_s0
-       _asm_extable 36b, .L_fixup_handle_s0
-       _asm_extable 37b, .L_fixup_handle_s0
-       _asm_extable 38b, .L_fixup_handle_s0
-       _asm_extable 39b, .L_fixup_handle_s0
-       _asm_extable 40b, .L_fixup_handle_s0
-       _asm_extable 41b, .L_fixup_handle_s2
-       _asm_extable 42b, .L_fixup_handle_s0
-       _asm_extable 43b, .L_fixup_handle_s0
-       _asm_extable 44b, .L_fixup_handle_s0
-       _asm_extable 45b, .L_fixup_handle_s0
-       _asm_extable 46b, .L_fixup_handle_s0
-       _asm_extable 47b, .L_fixup_handle_s4
-       _asm_extable 48b, .L_fixup_handle_s0
-       _asm_extable 49b, .L_fixup_handle_s0
-       _asm_extable 50b, .L_fixup_handle_s0
-       _asm_extable 51b, .L_fixup_handle_s4
-       _asm_extable 52b, .L_fixup_handle_s0
-       _asm_extable 53b, .L_fixup_handle_s0
-       _asm_extable 54b, .L_fixup_handle_s0
-       _asm_extable 55b, .L_fixup_handle_s4
-       _asm_extable 56b, .L_fixup_handle_s0
-       _asm_extable 57b, .L_fixup_handle_s0
+.Llarge_fixup:
+       sub.d   a2, a2, a0
+
+.Lsmall_fixup:
+58:    ld.b    t0, a1, 0
+59:    st.b    t0, a0, 0
+       addi.d  a0, a0, 1
+       addi.d  a1, a1, 1
+       addi.d  a2, a2, -1
+       bgt     a2, zero, 58b
+
+.Lexit:
+       move    a0, a2
+       jr      ra
+
+       _asm_extable 0b, .Lsmall_fixup
+       _asm_extable 1b, .Lsmall_fixup
+       _asm_extable 2b, .Llarge_fixup
+       _asm_extable 3b, .Llarge_fixup
+       _asm_extable 4b, .Llarge_fixup
+       _asm_extable 5b, .Llarge_fixup
+       _asm_extable 6b, .Llarge_fixup
+       _asm_extable 7b, .Llarge_fixup
+       _asm_extable 8b, .Llarge_fixup
+       _asm_extable 9b, .Llarge_fixup
+       _asm_extable 10b, .Llarge_fixup
+       _asm_extable 11b, .Llarge_fixup
+       _asm_extable 12b, .Llarge_fixup
+       _asm_extable 13b, .Llarge_fixup
+       _asm_extable 14b, .Llarge_fixup
+       _asm_extable 15b, .Llarge_fixup
+       _asm_extable 16b, .Llarge_fixup
+       _asm_extable 17b, .Llarge_fixup
+       _asm_extable 18b, .Llarge_fixup
+       _asm_extable 19b, .Llarge_fixup
+       _asm_extable 20b, .Llarge_fixup
+       _asm_extable 21b, .Llarge_fixup
+       _asm_extable 22b, .Llarge_fixup
+       _asm_extable 23b, .Llarge_fixup
+       _asm_extable 24b, .Llarge_fixup
+       _asm_extable 25b, .Llarge_fixup
+       _asm_extable 26b, .Llarge_fixup
+       _asm_extable 27b, .Llarge_fixup
+       _asm_extable 28b, .Llarge_fixup
+       _asm_extable 29b, .Llarge_fixup
+       _asm_extable 30b, .Llarge_fixup
+       _asm_extable 31b, .Llarge_fixup
+       _asm_extable 32b, .Llarge_fixup
+       _asm_extable 33b, .Llarge_fixup
+       _asm_extable 34b, .Lexit
+       _asm_extable 35b, .Lexit
+       _asm_extable 36b, .Lsmall_fixup
+       _asm_extable 37b, .Lsmall_fixup
+       _asm_extable 38b, .Lsmall_fixup
+       _asm_extable 39b, .Lsmall_fixup
+       _asm_extable 40b, .Lsmall_fixup
+       _asm_extable 41b, .Lsmall_fixup
+       _asm_extable 42b, .Lsmall_fixup
+       _asm_extable 43b, .Lsmall_fixup
+       _asm_extable 44b, .Lsmall_fixup
+       _asm_extable 45b, .Lsmall_fixup
+       _asm_extable 46b, .Lsmall_fixup
+       _asm_extable 47b, .Lsmall_fixup
+       _asm_extable 48b, .Lsmall_fixup
+       _asm_extable 49b, .Lsmall_fixup
+       _asm_extable 50b, .Lsmall_fixup
+       _asm_extable 51b, .Lsmall_fixup
+       _asm_extable 52b, .Lsmall_fixup
+       _asm_extable 53b, .Lsmall_fixup
+       _asm_extable 54b, .Lsmall_fixup
+       _asm_extable 55b, .Lsmall_fixup
+       _asm_extable 56b, .Lsmall_fixup
+       _asm_extable 57b, .Lsmall_fixup
+       _asm_extable 58b, .Lexit
+       _asm_extable 59b, .Lexit
 SYM_FUNC_END(__copy_user_fast)
index cc30b3b..fa11488 100644 (file)
@@ -10,6 +10,8 @@
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memcpy)
        /*
         * Some CPUs support hardware unaligned access
@@ -17,9 +19,13 @@ SYM_FUNC_START(memcpy)
        ALTERNATIVE     "b __memcpy_generic", \
                        "b __memcpy_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memcpy)
-_ASM_NOKPROBE(memcpy)
+SYM_FUNC_ALIAS(__memcpy, memcpy)
 
 EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+_ASM_NOKPROBE(memcpy)
+_ASM_NOKPROBE(__memcpy)
 
 /*
  * void *__memcpy_generic(void *dst, const void *src, size_t n)
index 7dc76d1..82dae06 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memmove)
-       blt     a0, a1, memcpy  /* dst < src, memcpy */
-       blt     a1, a0, rmemcpy /* src < dst, rmemcpy */
-       jr      ra              /* dst == src, return */
+       blt     a0, a1, __memcpy        /* dst < src, memcpy */
+       blt     a1, a0, __rmemcpy       /* src < dst, rmemcpy */
+       jr      ra                      /* dst == src, return */
 SYM_FUNC_END(memmove)
-_ASM_NOKPROBE(memmove)
+SYM_FUNC_ALIAS(__memmove, memmove)
 
 EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL(__memmove)
+
+_ASM_NOKPROBE(memmove)
+_ASM_NOKPROBE(__memmove)
 
-SYM_FUNC_START(rmemcpy)
+SYM_FUNC_START(__rmemcpy)
        /*
         * Some CPUs support hardware unaligned access
         */
        ALTERNATIVE     "b __rmemcpy_generic", \
                        "b __rmemcpy_fast", CPU_FEATURE_UAL
-SYM_FUNC_END(rmemcpy)
-_ASM_NOKPROBE(rmemcpy)
+SYM_FUNC_END(__rmemcpy)
+_ASM_NOKPROBE(__rmemcpy)
 
 /*
  * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
index 3f20f79..06d3ca5 100644 (file)
@@ -16,6 +16,8 @@
        bstrins.d \r0, \r0, 63, 32
 .endm
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memset)
        /*
         * Some CPUs support hardware unaligned access
@@ -23,9 +25,13 @@ SYM_FUNC_START(memset)
        ALTERNATIVE     "b __memset_generic", \
                        "b __memset_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memset)
-_ASM_NOKPROBE(memset)
+SYM_FUNC_ALIAS(__memset, memset)
 
 EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
+
+_ASM_NOKPROBE(memset)
+_ASM_NOKPROBE(__memset)
 
 /*
  * void *__memset_generic(void *s, int c, size_t n)
diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c
new file mode 100644 (file)
index 0000000..84cd24b
--- /dev/null
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include "xor_simd.h"
+
+/*
+ * Process one cache line (64 bytes) per loop. This is assuming all future
+ * popular LoongArch cores are similar performance-characteristics-wise to the
+ * current models.
+ */
+#define LINE_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+#define LD(reg, base, offset)  \
+       "vld $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)  \
+       "vst $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)     "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)    \
+       LD(0, base, 0)          \
+       LD(1, base, 16)         \
+       LD(2, base, 32)         \
+       LD(3, base, 48)
+
+#define LD_AND_XOR_LINE(base)  \
+       LD(4, base, 0)          \
+       LD(5, base, 16)         \
+       LD(6, base, 32)         \
+       LD(7, base, 48)         \
+       XOR(0, 4)               \
+       XOR(1, 5)               \
+       XOR(2, 6)               \
+       XOR(3, 7)
+
+#define ST_LINE(base)          \
+       ST(0, base, 0)          \
+       ST(1, base, 16)         \
+       ST(2, base, 32)         \
+       ST(3, base, 48)
+
+#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+#define LD(reg, base, offset)  \
+       "xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)  \
+       "xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)     "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)    \
+       LD(0, base, 0)          \
+       LD(1, base, 32)
+
+#define LD_AND_XOR_LINE(base)  \
+       LD(2, base, 0)          \
+       LD(3, base, 32)         \
+       XOR(0, 2)               \
+       XOR(1, 3)
+
+#define ST_LINE(base)          \
+       ST(0, base, 0)          \
+       ST(1, base, 32)
+
+#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h
new file mode 100644 (file)
index 0000000..f50f325
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Simple interface to link xor_simd.c and xor_simd_glue.c
+ *
+ * Separating these files ensures that no SIMD instructions are run outside of
+ * the kfpu critical section.
+ */
+
+#ifndef __LOONGARCH_LIB_XOR_SIMD_H
+#define __LOONGARCH_LIB_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2);
+void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4);
+void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2);
+void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                 const unsigned long * __restrict p4);
+void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                 const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c
new file mode 100644 (file)
index 0000000..393f689
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/fpu.h>
+#include <asm/xor_simd.h>
+#include "xor_simd.h"
+
+#define MAKE_XOR_GLUE_2(flavor)                                                        \
+void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_2(bytes, p1, p2);                                      \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_2)
+
+#define MAKE_XOR_GLUE_3(flavor)                                                        \
+void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_3(bytes, p1, p2, p3);                                  \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_3)
+
+#define MAKE_XOR_GLUE_4(flavor)                                                        \
+void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3,                      \
+                     const unsigned long * __restrict p4)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_4(bytes, p1, p2, p3, p4);                              \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_4)
+
+#define MAKE_XOR_GLUE_5(flavor)                                                        \
+void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3,                      \
+                     const unsigned long * __restrict p4,                      \
+                     const unsigned long * __restrict p5)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5);                          \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_5)
+
+#define MAKE_XOR_GLUES(flavor)         \
+       MAKE_XOR_GLUE_2(flavor);        \
+       MAKE_XOR_GLUE_3(flavor);        \
+       MAKE_XOR_GLUE_4(flavor);        \
+       MAKE_XOR_GLUE_5(flavor)
+
+#ifdef CONFIG_CPU_HAS_LSX
+MAKE_XOR_GLUES(lsx);
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+MAKE_XOR_GLUES(lasx);
+#endif
diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c
new file mode 100644 (file)
index 0000000..0358ced
--- /dev/null
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Template for XOR operations, instantiated in xor_simd.c.
+ *
+ * Expected preprocessor definitions:
+ *
+ * - LINE_WIDTH
+ * - XOR_FUNC_NAME(nr)
+ * - LD_INOUT_LINE(buf)
+ * - LD_AND_XOR_LINE(buf)
+ * - ST_LINE(buf)
+ */
+
+void XOR_FUNC_NAME(2)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(3)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(4)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3,
+                     const unsigned long * __restrict v4)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       LD_AND_XOR_LINE(v4)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
+               : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+               v4 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(5)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3,
+                     const unsigned long * __restrict v4,
+                     const unsigned long * __restrict v5)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       LD_AND_XOR_LINE(v4)
+                       LD_AND_XOR_LINE(v5)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
+                   [v5] "r"(v5) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+               v4 += LINE_WIDTH / sizeof(unsigned long);
+               v5 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
index 8ffc638..e4d1e58 100644 (file)
@@ -7,3 +7,6 @@ obj-y                           += init.o cache.o tlb.o tlbex.o extable.o \
                                   fault.o ioremap.o maccess.o mmap.o pgtable.o page.o
 
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
+obj-$(CONFIG_KASAN)            += kasan_init.o
+
+KASAN_SANITIZE_kasan_init.o     := n
index 72685a4..6be04d3 100644 (file)
@@ -156,7 +156,6 @@ void cpu_cache_init(void)
 
        current_cpu_data.cache_leaves_present = leaf;
        current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
-       shm_align_mask = PAGE_SIZE - 1;
 }
 
 static const pgprot_t protection_map[16] = {
index da5b6d5..e6376e3 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
+#include <linux/kfence.h>
 
 #include <asm/branch.h>
 #include <asm/mmu_context.h>
@@ -30,7 +31,8 @@
 
 int show_unhandled_signals = 1;
 
-static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
+static void __kprobes no_context(struct pt_regs *regs,
+                       unsigned long write, unsigned long address)
 {
        const int field = sizeof(unsigned long) * 2;
 
@@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
        if (fixup_exception(regs))
                return;
 
+       if (kfence_handle_page_fault(address, write, regs))
+               return;
+
        /*
         * Oops. The kernel tried to access some bad page. We'll have to
         * terminate things with extreme prejudice.
@@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
        die("Oops", regs);
 }
 
-static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
+static void __kprobes do_out_of_memory(struct pt_regs *regs,
+                       unsigned long write, unsigned long address)
 {
        /*
         * We ran out of memory, call the OOM killer, and return the userspace
         * (which will retry the fault, or kill us if we got oom-killed).
         */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
        pagefault_out_of_memory();
@@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
 {
        /* Kernel mode? Handle exceptions or die */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
 
@@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
 
        /* Kernel mode? Handle exceptions or die */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
 
@@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
         */
        if (address & __UA_LIMIT) {
                if (!user_mode(regs))
-                       no_context(regs, address);
+                       no_context(regs, write, address);
                else
                        do_sigsegv(regs, write, address, si_code);
                return;
@@ -211,7 +217,7 @@ good_area:
 
        if (fault_signal_pending(fault, regs)) {
                if (!user_mode(regs))
-                       no_context(regs, address);
+                       no_context(regs, write, address);
                return;
        }
 
@@ -232,7 +238,7 @@ good_area:
        if (unlikely(fault & VM_FAULT_ERROR)) {
                mmap_read_unlock(mm);
                if (fault & VM_FAULT_OOM) {
-                       do_out_of_memory(regs, address);
+                       do_out_of_memory(regs, write, address);
                        return;
                } else if (fault & VM_FAULT_SIGSEGV) {
                        do_sigsegv(regs, write, address, si_code);
index 3b7d812..f3fe8c0 100644 (file)
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
-/*
- * We have up to 8 empty zeroed pages so we can map one of the right colour
- * when needed.         Since page is never written to after the initialization we
- * don't have to care about aliases on other CPUs.
- */
-unsigned long empty_zero_page, zero_page_mask;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(zero_page_mask);
-
-void setup_zero_pages(void)
-{
-       unsigned int order, i;
-       struct page *page;
-
-       order = 0;
-
-       empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-       if (!empty_zero_page)
-               panic("Oh boy, that early out of memory?");
-
-       page = virt_to_page((void *)empty_zero_page);
-       split_page(page, order);
-       for (i = 0; i < (1 << order); i++, page++)
-               mark_page_reserved(page);
-
-       zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
-}
 
 void copy_user_highpage(struct page *to, struct page *from,
        unsigned long vaddr, struct vm_area_struct *vma)
@@ -106,7 +81,6 @@ void __init mem_init(void)
        high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
        memblock_free_all();
-       setup_zero_pages();     /* Setup zeroed pages.  */
 }
 #endif /* !CONFIG_NUMA */
 
@@ -191,43 +165,42 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *al
 #endif
 #endif
 
-static pte_t *fixmap_pte(unsigned long addr)
+pte_t * __init populate_kernel_pte(unsigned long addr)
 {
-       pgd_t *pgd;
-       p4d_t *p4d;
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
        pud_t *pud;
        pmd_t *pmd;
 
-       pgd = pgd_offset_k(addr);
-       p4d = p4d_offset(pgd, addr);
-
-       if (pgd_none(*pgd)) {
-               pud_t *new __maybe_unused;
-
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pgd_populate(&init_mm, pgd, new);
+       if (p4d_none(*p4d)) {
+               pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pud)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               p4d_populate(&init_mm, p4d, pud);
 #ifndef __PAGETABLE_PUD_FOLDED
-               pud_init(new);
+               pud_init(pud);
 #endif
        }
 
        pud = pud_offset(p4d, addr);
        if (pud_none(*pud)) {
-               pmd_t *new __maybe_unused;
-
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pud_populate(&init_mm, pud, new);
+               pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pmd)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               pud_populate(&init_mm, pud, pmd);
 #ifndef __PAGETABLE_PMD_FOLDED
-               pmd_init(new);
+               pmd_init(pmd);
 #endif
        }
 
        pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd)) {
-               pte_t *new __maybe_unused;
+       if (!pmd_present(*pmd)) {
+               pte_t *pte;
 
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pmd_populate_kernel(&init_mm, pmd, new);
+               pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pte)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               pmd_populate_kernel(&init_mm, pmd, pte);
        }
 
        return pte_offset_kernel(pmd, addr);
@@ -241,7 +214,7 @@ void __init __set_fixmap(enum fixed_addresses idx,
 
        BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-       ptep = fixmap_pte(addr);
+       ptep = populate_kernel_pte(addr);
        if (!pte_none(*ptep)) {
                pte_ERROR(*ptep);
                return;
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
new file mode 100644 (file)
index 0000000..da68bc1
--- /dev/null
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm-generic/sections.h>
+
+static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef __PAGETABLE_PUD_FOLDED
+#define __p4d_none(early, p4d) (0)
+#else
+#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \
+(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud)))
+#endif
+
+#ifdef __PAGETABLE_PMD_FOLDED
+#define __pud_none(early, pud) (0)
+#else
+#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \
+(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd)))
+#endif
+
+#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \
+(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte)))
+
+#define __pte_none(early, pte) (early ? pte_none(pte) : \
+((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page)))
+
+bool kasan_early_stage = true;
+
+/*
+ * Alloc memory for shadow memory page table.
+ */
+static phys_addr_t __init kasan_alloc_zeroed_page(int node)
+{
+       void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+                                       __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+       if (!p)
+               panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
+                       __func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS));
+
+       return __pa(p);
+}
+
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early)
+{
+       if (__pmd_none(early, READ_ONCE(*pmdp))) {
+               phys_addr_t pte_phys = early ?
+                               __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
+               pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
+       }
+
+       return pte_offset_kernel(pmdp, addr);
+}
+
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early)
+{
+       if (__pud_none(early, READ_ONCE(*pudp))) {
+               phys_addr_t pmd_phys = early ?
+                               __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd));
+               pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys));
+       }
+
+       return pmd_offset(pudp, addr);
+}
+
+static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early)
+{
+       if (__p4d_none(early, READ_ONCE(*p4dp))) {
+               phys_addr_t pud_phys = early ?
+                       __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud));
+               p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys));
+       }
+
+       return pud_offset(p4dp, addr);
+}
+
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
+                                     unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
+
+       do {
+               phys_addr_t page_phys = early ?
+                                       __pa_symbol(kasan_early_shadow_page)
+                                             : kasan_alloc_zeroed_page(node);
+               next = addr + PAGE_SIZE;
+               set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+       } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep)));
+}
+
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
+                                     unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
+
+       do {
+               next = pmd_addr_end(addr, end);
+               kasan_pte_populate(pmdp, addr, next, node, early);
+       } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp)));
+}
+
+static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
+                                           unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early);
+
+       do {
+               next = pud_addr_end(addr, end);
+               kasan_pmd_populate(pudp, addr, next, node, early);
+       } while (pudp++, addr = next, addr != end);
+}
+
+static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
+                                           unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       p4d_t *p4dp = p4d_offset(pgdp, addr);
+
+       do {
+               next = p4d_addr_end(addr, end);
+               kasan_pud_populate(p4dp, addr, next, node, early);
+       } while (p4dp++, addr = next, addr != end);
+}
+
+static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
+                                     int node, bool early)
+{
+       unsigned long next;
+       pgd_t *pgdp;
+
+       pgdp = pgd_offset_k(addr);
+
+       do {
+               next = pgd_addr_end(addr, end);
+               kasan_p4d_populate(pgdp, addr, next, node, early);
+       } while (pgdp++, addr = next, addr != end);
+
+}
+
+/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
+static void __init kasan_map_populate(unsigned long start, unsigned long end,
+                                     int node)
+{
+       kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+}
+
+static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval)
+{
+       WRITE_ONCE(*pgdp, pgdval);
+}
+
+static void __init clear_pgds(unsigned long start, unsigned long end)
+{
+       /*
+        * Remove references to kasan page tables from
+        * swapper_pg_dir. pgd_clear() can't be used
+        * here because it's nop on 2,3-level pagetable setups
+        */
+       for (; start < end; start += PGDIR_SIZE)
+               kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0));
+}
+
+void __init kasan_init(void)
+{
+       u64 i;
+       phys_addr_t pa_start, pa_end;
+
+       /*
+        * PGD was populated as invalid_pmd_table or invalid_pud_table
+        * in pagetable_init() which depends on how many levels of page
+        * table you are using, but we had to clean the gpd of kasan
+        * shadow memory, as the pgd value is none-zero.
+        * The assertion pgd_none is going to be false and the formal populate
+        * afterwards is not going to create any new pgd at all.
+        */
+       memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir));
+       csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH);
+       local_flush_tlb_all();
+
+       clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+       /* Maps everything to a single page of zeroes */
+       kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true);
+
+       kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
+                                       kasan_mem_to_shadow((void *)KFENCE_AREA_END));
+
+       kasan_early_stage = false;
+
+       /* Populate the linear mapping */
+       for_each_mem_range(i, &pa_start, &pa_end) {
+               void *start = (void *)phys_to_virt(pa_start);
+               void *end   = (void *)phys_to_virt(pa_end);
+
+               if (start >= end)
+                       break;
+
+               kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
+                       (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE);
+       }
+
+       /* Populate modules mapping */
+       kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR),
+               (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE);
+       /*
+        * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we
+        * should make sure that it maps the zero page read-only.
+        */
+       for (i = 0; i < PTRS_PER_PTE; i++)
+               set_pte(&kasan_early_shadow_pte[i],
+                       pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO));
+
+       memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+       csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH);
+       local_flush_tlb_all();
+
+       /* At this point kasan is fully initialized. Enable error messages */
+       init_task.kasan_depth = 0;
+       pr_info("KernelAddressSanitizer initialized.\n");
+}
index fbe1a48..a9630a8 100644 (file)
@@ -8,12 +8,11 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 
-unsigned long shm_align_mask = PAGE_SIZE - 1;  /* Sane caches */
-EXPORT_SYMBOL(shm_align_mask);
+#define SHM_ALIGN_MASK (SHMLBA - 1)
 
-#define COLOUR_ALIGN(addr, pgoff)                              \
-       ((((addr) + shm_align_mask) & ~shm_align_mask) +        \
-        (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+#define COLOUR_ALIGN(addr, pgoff)                      \
+       ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK)  \
+        + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK))
 
 enum mmap_allocation_direction {UP, DOWN};
 
@@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
                 * cache aliasing constraints.
                 */
                if ((flags & MAP_SHARED) &&
-                   ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+                   ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK))
                        return -EINVAL;
                return addr;
        }
@@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
        }
 
        info.length = len;
-       info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
+       info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0;
        info.align_offset = pgoff << PAGE_SHIFT;
 
        if (dir == DOWN) {
index b14343e..71d0539 100644 (file)
@@ -9,6 +9,18 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+struct page *dmw_virt_to_page(unsigned long kaddr)
+{
+       return pfn_to_page(virt_to_pfn(kaddr));
+}
+EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+
+struct page *tlb_virt_to_page(unsigned long kaddr)
+{
+       return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+}
+EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
        pgd_t *init, *ret = NULL;
index a50308b..5c97d14 100644 (file)
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 # Objects to go into the VDSO.
 
+KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
 # Include the generic Makefile to check the built vdso.
 include $(srctree)/lib/vdso/Makefile
 
diff --git a/arch/m68k/include/asm/ide.h b/arch/m68k/include/asm/ide.h
deleted file mode 100644 (file)
index 05cc7dc..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/* Copyright(c) 1996 Kars de Jong */
-/* Based on the ide driver from 1.2.13pl8 */
-
-/*
- * Credits (alphabetical):
- *
- *  - Bjoern Brauel
- *  - Kars de Jong
- *  - Torsten Ebeling
- *  - Dwight Engen
- *  - Thorsten Floeck
- *  - Roman Hodek
- *  - Guenther Kelleter
- *  - Chris Lawrence
- *  - Michael Rausch
- *  - Christian Sauer
- *  - Michael Schmitz
- *  - Jes Soerensen
- *  - Michael Thurm
- *  - Geert Uytterhoeven
- */
-
-#ifndef _M68K_IDE_H
-#define _M68K_IDE_H
-
-#ifdef __KERNEL__
-#include <asm/setup.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-
-#ifdef CONFIG_MMU
-
-/*
- * Get rid of defs from io.h - ide has its private and conflicting versions
- * Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we
- * always use the `raw' MMIO versions
- */
-#undef readb
-#undef readw
-#undef writeb
-#undef writew
-
-#define readb                          in_8
-#define readw                          in_be16
-#define __ide_mm_insw(port, addr, n)   raw_insw((u16 *)port, addr, n)
-#define __ide_mm_insl(port, addr, n)   raw_insl((u32 *)port, addr, n)
-#define writeb(val, port)              out_8(port, val)
-#define writew(val, port)              out_be16(port, val)
-#define __ide_mm_outsw(port, addr, n)  raw_outsw((u16 *)port, addr, n)
-#define __ide_mm_outsl(port, addr, n)  raw_outsl((u32 *)port, addr, n)
-
-#else
-
-#define __ide_mm_insw(port, addr, n)   io_insw((unsigned int)port, addr, n)
-#define __ide_mm_insl(port, addr, n)   io_insl((unsigned int)port, addr, n)
-#define __ide_mm_outsw(port, addr, n)  io_outsw((unsigned int)port, addr, n)
-#define __ide_mm_outsl(port, addr, n)  io_outsl((unsigned int)port, addr, n)
-
-#endif /* CONFIG_MMU */
-
-#endif /* __KERNEL__ */
-#endif /* _M68K_IDE_H */
index 337f23e..86a4ce0 100644 (file)
@@ -99,9 +99,6 @@ extern int page_is_ram(unsigned long pfn);
 # define phys_to_pfn(phys)     (PFN_DOWN(phys))
 # define pfn_to_phys(pfn)      (PFN_PHYS(pfn))
 
-# define virt_to_pfn(vaddr)    (phys_to_pfn((__pa(vaddr))))
-# define pfn_to_virt(pfn)      __va(pfn_to_phys((pfn)))
-
 #  define virt_to_page(kaddr)  (pfn_to_page(__pa(kaddr) >> PAGE_SHIFT))
 #  define page_to_virt(page)   __va(page_to_pfn(page) << PAGE_SHIFT)
 #  define page_to_phys(page)     (page_to_pfn(page) << PAGE_SHIFT)
@@ -109,11 +106,6 @@ extern int page_is_ram(unsigned long pfn);
 #  define ARCH_PFN_OFFSET      (memory_start >> PAGE_SHIFT)
 # endif /* __ASSEMBLY__ */
 
-#define        virt_addr_valid(vaddr)  (pfn_valid(virt_to_pfn(vaddr)))
-
-# define __pa(x)       __virt_to_phys((unsigned long)(x))
-# define __va(x)       ((void *)__phys_to_virt((unsigned long)(x)))
-
 /* Convert between virtual and physical address for MMU. */
 /* Handle MicroBlaze processor with virtual memory. */
 #define __virt_to_phys(addr) \
@@ -125,6 +117,25 @@ extern int page_is_ram(unsigned long pfn);
 #define tovirt(rd, rs) \
        addik rd, rs, (CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR)
 
+#ifndef __ASSEMBLY__
+
+# define __pa(x)       __virt_to_phys((unsigned long)(x))
+# define __va(x)       ((void *)__phys_to_virt((unsigned long)(x)))
+
+static inline unsigned long virt_to_pfn(const void *vaddr)
+{
+       return phys_to_pfn(__pa(vaddr));
+}
+
+static inline const void *pfn_to_virt(unsigned long pfn)
+{
+       return __va(pfn_to_phys((pfn)));
+}
+
+#define        virt_addr_valid(vaddr)  (pfn_valid(virt_to_pfn(vaddr)))
+
+#endif /* __ASSEMBLY__ */
+
 #define TOPHYS(addr)  __virt_to_phys(addr)
 
 #endif /* __KERNEL__ */
index 3657f5e..bf2600f 100644 (file)
@@ -25,7 +25,5 @@ void machine_shutdown(void);
 void machine_halt(void);
 void machine_power_off(void);
 
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
-
 # endif /* __ASSEMBLY__ */
 #endif /* _ASM_MICROBLAZE_SETUP_H */
index 5f47229..2f66c79 100644 (file)
@@ -9,7 +9,6 @@
 
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/of_platform.h>
 #include <linux/reboot.h>
 
 void machine_shutdown(void)
index 353fabd..3827dc7 100644 (file)
@@ -270,22 +270,6 @@ asmlinkage void __init mmu_init(void)
        memblock_dump_all();
 }
 
-void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
-{
-       void *p;
-
-       if (mem_init_done) {
-               p = kzalloc(size, mask);
-       } else {
-               p = memblock_alloc(size, SMP_CACHE_BYTES);
-               if (!p)
-                       panic("%s: Failed to allocate %zu bytes\n",
-                             __func__, size);
-       }
-
-       return p;
-}
-
 static const pgprot_t protection_map[16] = {
        [VM_NONE]                                       = PAGE_NONE,
        [VM_READ]                                       = PAGE_READONLY_X,
index a47593d..f49807e 100644 (file)
@@ -181,12 +181,16 @@ endif
 cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
 cflags-$(CONFIG_CPU_BMIPS)     += -march=mips32 -Wa,-mips32 -Wa,--trap
 
-cflags-$(CONFIG_CPU_LOONGSON2E) += $(call cc-option,-march=loongson2e) -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2F) += $(call cc-option,-march=loongson2f) -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-march=loongson3a,-march=mips64r2) -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2E) += -march=loongson2e -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2F) += -march=loongson2f -Wa,--trap
 # Some -march= flags enable MMI instructions, and GCC complains about that
 # support being enabled alongside -msoft-float. Thus explicitly disable MMI.
 cflags-$(CONFIG_CPU_LOONGSON2EF) += $(call cc-option,-mno-loongson-mmi)
+ifdef CONFIG_CPU_LOONGSON64
+cflags-$(CONFIG_CPU_LOONGSON64)        += -Wa,--trap
+cflags-$(CONFIG_CC_IS_GCC) += -march=loongson3a
+cflags-$(CONFIG_CC_IS_CLANG) += -march=mips64r2
+endif
 cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-mno-loongson-mmi)
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,)
@@ -299,8 +303,8 @@ ifdef CONFIG_64BIT
     endif
   endif
 
-  ifeq ($(KBUILD_SYM32)$(call cc-option-yn,-msym32), yy)
-    cflags-y += -msym32 -DKBUILD_64BIT_SYM32
+  ifeq ($(KBUILD_SYM32)y)
+    cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
   else
     ifeq ($(CONFIG_CPU_DADDI_WORKAROUNDS), y)
       $(error CONFIG_CPU_DADDI_WORKAROUNDS unsupported without -msym32)
@@ -341,7 +345,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 KBUILD_LDFLAGS         += -m $(ld-emul)
 
-ifdef CONFIG_MIPS
+ifdef need-compiler
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
        grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
        sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
index 053805c..ec180ab 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/of.h>
 #include <linux/of_clk.h>
 #include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 #include <linux/libfdt.h>
 #include <linux/smp.h>
 #include <asm/addrspace.h>
index c8a8c6d..3395acd 100644 (file)
@@ -12,7 +12,8 @@
 #include <linux/semaphore.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/mtd/partitions.h>
 
 #include <asm/octeon/octeon.h>
index 25860fb..fef0c6d 100644 (file)
@@ -13,9 +13,9 @@
  * Mnemonic names for arguments to memcpy/__copy_user
  */
 
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define dst a0
index 235c77c..f76783c 100644 (file)
@@ -8,8 +8,10 @@
  */
 
 #include <linux/etherdevice.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
+#include <linux/platform_device.h>
 #include <linux/libfdt.h>
 
 #include <asm/octeon/octeon.h>
index 44821f4..dc49b09 100644 (file)
@@ -127,7 +127,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index 2b41331..07839a4 100644 (file)
@@ -283,6 +283,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
 CONFIG_DRM_AMD_ACP=y
 CONFIG_DRM_AMD_DC=y
 CONFIG_DRM_AMD_DC_SI=y
+CONFIG_DRM_AST=m
 CONFIG_DRM_RADEON=m
 CONFIG_DRM_QXL=y
 CONFIG_DRM_VIRTIO_GPU=y
index 7432090..ae1a779 100644 (file)
@@ -127,7 +127,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index dd2b9c1..c07e30f 100644 (file)
@@ -131,7 +131,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index 97c2d7f..0a57010 100644 (file)
@@ -128,7 +128,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index e0e312d..5c5e218 100644 (file)
@@ -90,7 +90,6 @@ CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
index dee1727..7ba67a0 100644 (file)
@@ -7,7 +7,6 @@ generated-y += unistd_nr_n32.h
 generated-y += unistd_nr_n64.h
 generated-y += unistd_nr_o32.h
 
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
index 04cedf9..54a85f1 100644 (file)
@@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-int kvm_arch_flush_remote_tlb(struct kvm *kvm);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
 
 #endif /* __MIPS_KVM_HOST_H__ */
index 7971272..84f4546 100644 (file)
@@ -45,8 +45,6 @@
 #define LS1X_NAND_BASE                 0x1fe78000
 #define LS1X_CLK_BASE                  0x1fe78030
 
-#include <regs-clk.h>
 #include <regs-mux.h>
-#include <regs-rtc.h>
 
 #endif /* __ASM_MACH_LOONGSON32_LOONGSON1_H */
diff --git a/arch/mips/include/asm/mach-loongson32/regs-clk.h b/arch/mips/include/asm/mach-loongson32/regs-clk.h
deleted file mode 100644 (file)
index 98136fa..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
- *
- * Loongson 1 Clock Register Definitions.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_REGS_CLK_H
-#define __ASM_MACH_LOONGSON32_REGS_CLK_H
-
-#define LS1X_CLK_REG(x) \
-               ((void __iomem *)KSEG1ADDR(LS1X_CLK_BASE + (x)))
-
-#define LS1X_CLK_PLL_FREQ              LS1X_CLK_REG(0x0)
-#define LS1X_CLK_PLL_DIV               LS1X_CLK_REG(0x4)
-
-#if defined(CONFIG_LOONGSON1_LS1B)
-/* Clock PLL Divisor Register Bits */
-#define DIV_DC_EN                      BIT(31)
-#define DIV_DC_RST                     BIT(30)
-#define DIV_CPU_EN                     BIT(25)
-#define DIV_CPU_RST                    BIT(24)
-#define DIV_DDR_EN                     BIT(19)
-#define DIV_DDR_RST                    BIT(18)
-#define RST_DC_EN                      BIT(5)
-#define RST_DC                         BIT(4)
-#define RST_DDR_EN                     BIT(3)
-#define RST_DDR                                BIT(2)
-#define RST_CPU_EN                     BIT(1)
-#define RST_CPU                                BIT(0)
-
-#define DIV_DC_SHIFT                   26
-#define DIV_CPU_SHIFT                  20
-#define DIV_DDR_SHIFT                  14
-
-#define DIV_DC_WIDTH                   4
-#define DIV_CPU_WIDTH                  4
-#define DIV_DDR_WIDTH                  4
-
-#define BYPASS_DC_SHIFT                        12
-#define BYPASS_DDR_SHIFT               10
-#define BYPASS_CPU_SHIFT               8
-
-#define BYPASS_DC_WIDTH                        1
-#define BYPASS_DDR_WIDTH               1
-#define BYPASS_CPU_WIDTH               1
-
-#elif defined(CONFIG_LOONGSON1_LS1C)
-/* PLL/SDRAM Frequency configuration register Bits */
-#define PLL_VALID                      BIT(31)
-#define FRAC_N                         GENMASK(23, 16)
-#define RST_TIME                       GENMASK(3, 2)
-#define SDRAM_DIV                      GENMASK(1, 0)
-
-/* CPU/CAMERA/DC Frequency configuration register Bits */
-#define DIV_DC_EN                      BIT(31)
-#define DIV_DC                         GENMASK(30, 24)
-#define DIV_CAM_EN                     BIT(23)
-#define DIV_CAM                                GENMASK(22, 16)
-#define DIV_CPU_EN                     BIT(15)
-#define DIV_CPU                                GENMASK(14, 8)
-#define DIV_DC_SEL_EN                  BIT(5)
-#define DIV_DC_SEL                     BIT(4)
-#define DIV_CAM_SEL_EN                 BIT(3)
-#define DIV_CAM_SEL                    BIT(2)
-#define DIV_CPU_SEL_EN                 BIT(1)
-#define DIV_CPU_SEL                    BIT(0)
-
-#define DIV_DC_SHIFT                   24
-#define DIV_CAM_SHIFT                  16
-#define DIV_CPU_SHIFT                  8
-#define DIV_DDR_SHIFT                  0
-
-#define DIV_DC_WIDTH                   7
-#define DIV_CAM_WIDTH                  7
-#define DIV_CPU_WIDTH                  7
-#define DIV_DDR_WIDTH                  2
-
-#endif
-
-#endif /* __ASM_MACH_LOONGSON32_REGS_CLK_H */
diff --git a/arch/mips/include/asm/mach-loongson32/regs-rtc.h b/arch/mips/include/asm/mach-loongson32/regs-rtc.h
deleted file mode 100644 (file)
index a3d096b..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2016 Yang Ling <gnaygnil@gmail.com>
- *
- * Loongson 1 RTC timer Register Definitions.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_REGS_RTC_H
-#define __ASM_MACH_LOONGSON32_REGS_RTC_H
-
-#define LS1X_RTC_REG(x) \
-               ((void __iomem *)KSEG1ADDR(LS1X_RTC_BASE + (x)))
-
-#define LS1X_RTC_CTRL  LS1X_RTC_REG(0x40)
-
-#define RTC_EXTCLK_OK  (BIT(5) | BIT(8))
-#define RTC_EXTCLK_EN  BIT(8)
-
-#endif /* __ASM_MACH_LOONGSON32_REGS_RTC_H */
index cff52b2..fcec579 100644 (file)
@@ -10,7 +10,7 @@
  * Author: Wu Zhangjin <wuzhangjin@gmail.com>
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
 #include <asm/ftrace.h>
index 9b7c8ab..447a3ea 100644 (file)
@@ -11,7 +11,6 @@
  *    written by Carsten Langgaard, carstenl@mips.com
  */
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/asm-offsets.h>
 #include <asm/mipsregs.h>
 #include <asm/regdef.h>
index 6c745aa..c000b22 100644 (file)
  * Further modifications to make this work:
  * Copyright (c) 1998 Harald Koerfgen
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
index 71b1aaf..48e6394 100644 (file)
@@ -13,7 +13,6 @@
  */
 #include <asm/asm.h>
 #include <asm/cachectl.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
index 4e8c985..4bb97ee 100644 (file)
  * Copyright (C) 2000 MIPS Technologies, Inc.
  * Copyright (C) 1999, 2001 Silicon Graphics, Inc.
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 #include <asm/asm-offsets.h>
index aa5583a..231ac05 100644 (file)
@@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
        /* Flush slot from GPA */
        kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
                              slot->base_gfn + slot->npages - 1);
-       kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+       kvm_flush_remote_tlbs_memslot(kvm, slot);
        spin_unlock(&kvm->mmu_lock);
 }
 
@@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
                                        new->base_gfn + new->npages - 1);
                if (needs_flush)
-                       kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+                       kvm_flush_remote_tlbs_memslot(kvm, new);
                spin_unlock(&kvm->mmu_lock);
        }
 }
@@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 
 }
 
-int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
        kvm_mips_callbacks->prepare_flush_shadow(kvm);
        return 1;
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
 int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 {
        int r;
index e8c0898..7b2ac13 100644 (file)
@@ -447,7 +447,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
        gpa_t gpa = range->start << PAGE_SHIFT;
-       pte_t hva_pte = range->pte;
+       pte_t hva_pte = range->arg.pte;
        pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
        pte_t old_pte;
 
index 20622bf..8f20800 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/sched.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 
index 9619996..4a808f8 100644 (file)
@@ -6,7 +6,8 @@
  */
 
 #include <linux/ioport.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 
 #include <lantiq_soc.h>
 
index a492b1e..8d52001 100644 (file)
@@ -8,8 +8,9 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 
 #include <lantiq_soc.h>
 #include "../clk.h"
index d444a1b..3ed0782 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/clkdev.h>
 #include <linux/spinlock.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/of_address.h>
 
 #include <lantiq_soc.h>
index 2796e87..37c1330 100644 (file)
@@ -7,7 +7,8 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 
 #include <lantiq_soc.h>
index 7767137..3d2ff41 100644 (file)
@@ -11,9 +11,9 @@
  * Copyright (C) 2014 Imagination Technologies Ltd.
  */
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #ifdef CONFIG_64BIT
index 18a43f2..a4b4e80 100644 (file)
@@ -32,9 +32,9 @@
 #undef CONFIG_CPU_HAS_PREFETCH
 #endif
 
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define dst a0
index 0b342ba..79405c3 100644 (file)
@@ -8,9 +8,9 @@
  * Copyright (C) 2007 by Maciej W. Rozycki
  * Copyright (C) 2011, 2012 MIPS Technologies, Inc.
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #if LONGSIZE == 4
index 13aaa99..94f4203 100644 (file)
@@ -7,9 +7,9 @@
  * Copyright (C) 2011 MIPS Technologies, Inc.
  */
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define EX(insn,reg,addr,handler)                      \
index 6de31b6..c192a6f 100644 (file)
@@ -6,9 +6,9 @@
  * Copyright (c) 1996, 1998, 1999, 2004 by Ralf Baechle
  * Copyright (c) 1999 Silicon Graphics, Inc.
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define EX(insn,reg,addr,handler)                      \
index 64d7979..8075590 100644 (file)
@@ -265,14 +265,6 @@ struct platform_device ls1x_ehci_pdev = {
 };
 
 /* Real Time Clock */
-void __init ls1x_rtc_set_extclk(struct platform_device *pdev)
-{
-       u32 val = __raw_readl(LS1X_RTC_CTRL);
-
-       if (!(val & RTC_EXTCLK_OK))
-               __raw_writel(val | RTC_EXTCLK_EN, LS1X_RTC_CTRL);
-}
-
 struct platform_device ls1x_rtc_pdev = {
        .name           = "ls1x-rtc",
        .id             = -1,
index cdecd7a..e015a26 100644 (file)
@@ -187,181 +187,181 @@ static void csr_ipi_probe(void)
 
 static void ipi_set0_regs_init(void)
 {
-       ipi_set0_regs[0] = (void *)
+       ipi_set0_regs[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + SET0);
-       ipi_set0_regs[1] = (void *)
+       ipi_set0_regs[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + SET0);
-       ipi_set0_regs[2] = (void *)
+       ipi_set0_regs[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + SET0);
-       ipi_set0_regs[3] = (void *)
+       ipi_set0_regs[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + SET0);
-       ipi_set0_regs[4] = (void *)
+       ipi_set0_regs[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + SET0);
-       ipi_set0_regs[5] = (void *)
+       ipi_set0_regs[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + SET0);
-       ipi_set0_regs[6] = (void *)
+       ipi_set0_regs[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + SET0);
-       ipi_set0_regs[7] = (void *)
+       ipi_set0_regs[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + SET0);
-       ipi_set0_regs[8] = (void *)
+       ipi_set0_regs[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + SET0);
-       ipi_set0_regs[9] = (void *)
+       ipi_set0_regs[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + SET0);
-       ipi_set0_regs[10] = (void *)
+       ipi_set0_regs[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + SET0);
-       ipi_set0_regs[11] = (void *)
+       ipi_set0_regs[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + SET0);
-       ipi_set0_regs[12] = (void *)
+       ipi_set0_regs[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + SET0);
-       ipi_set0_regs[13] = (void *)
+       ipi_set0_regs[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + SET0);
-       ipi_set0_regs[14] = (void *)
+       ipi_set0_regs[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + SET0);
-       ipi_set0_regs[15] = (void *)
+       ipi_set0_regs[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + SET0);
 }
 
 static void ipi_clear0_regs_init(void)
 {
-       ipi_clear0_regs[0] = (void *)
+       ipi_clear0_regs[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + CLEAR0);
-       ipi_clear0_regs[1] = (void *)
+       ipi_clear0_regs[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + CLEAR0);
-       ipi_clear0_regs[2] = (void *)
+       ipi_clear0_regs[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + CLEAR0);
-       ipi_clear0_regs[3] = (void *)
+       ipi_clear0_regs[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + CLEAR0);
-       ipi_clear0_regs[4] = (void *)
+       ipi_clear0_regs[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + CLEAR0);
-       ipi_clear0_regs[5] = (void *)
+       ipi_clear0_regs[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + CLEAR0);
-       ipi_clear0_regs[6] = (void *)
+       ipi_clear0_regs[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + CLEAR0);
-       ipi_clear0_regs[7] = (void *)
+       ipi_clear0_regs[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + CLEAR0);
-       ipi_clear0_regs[8] = (void *)
+       ipi_clear0_regs[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + CLEAR0);
-       ipi_clear0_regs[9] = (void *)
+       ipi_clear0_regs[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + CLEAR0);
-       ipi_clear0_regs[10] = (void *)
+       ipi_clear0_regs[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + CLEAR0);
-       ipi_clear0_regs[11] = (void *)
+       ipi_clear0_regs[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + CLEAR0);
-       ipi_clear0_regs[12] = (void *)
+       ipi_clear0_regs[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + CLEAR0);
-       ipi_clear0_regs[13] = (void *)
+       ipi_clear0_regs[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + CLEAR0);
-       ipi_clear0_regs[14] = (void *)
+       ipi_clear0_regs[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + CLEAR0);
-       ipi_clear0_regs[15] = (void *)
+       ipi_clear0_regs[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + CLEAR0);
 }
 
 static void ipi_status0_regs_init(void)
 {
-       ipi_status0_regs[0] = (void *)
+       ipi_status0_regs[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + STATUS0);
-       ipi_status0_regs[1] = (void *)
+       ipi_status0_regs[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + STATUS0);
-       ipi_status0_regs[2] = (void *)
+       ipi_status0_regs[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + STATUS0);
-       ipi_status0_regs[3] = (void *)
+       ipi_status0_regs[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + STATUS0);
-       ipi_status0_regs[4] = (void *)
+       ipi_status0_regs[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + STATUS0);
-       ipi_status0_regs[5] = (void *)
+       ipi_status0_regs[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + STATUS0);
-       ipi_status0_regs[6] = (void *)
+       ipi_status0_regs[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + STATUS0);
-       ipi_status0_regs[7] = (void *)
+       ipi_status0_regs[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + STATUS0);
-       ipi_status0_regs[8] = (void *)
+       ipi_status0_regs[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + STATUS0);
-       ipi_status0_regs[9] = (void *)
+       ipi_status0_regs[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + STATUS0);
-       ipi_status0_regs[10] = (void *)
+       ipi_status0_regs[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + STATUS0);
-       ipi_status0_regs[11] = (void *)
+       ipi_status0_regs[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + STATUS0);
-       ipi_status0_regs[12] = (void *)
+       ipi_status0_regs[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + STATUS0);
-       ipi_status0_regs[13] = (void *)
+       ipi_status0_regs[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + STATUS0);
-       ipi_status0_regs[14] = (void *)
+       ipi_status0_regs[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + STATUS0);
-       ipi_status0_regs[15] = (void *)
+       ipi_status0_regs[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + STATUS0);
 }
 
 static void ipi_en0_regs_init(void)
 {
-       ipi_en0_regs[0] = (void *)
+       ipi_en0_regs[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + EN0);
-       ipi_en0_regs[1] = (void *)
+       ipi_en0_regs[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + EN0);
-       ipi_en0_regs[2] = (void *)
+       ipi_en0_regs[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + EN0);
-       ipi_en0_regs[3] = (void *)
+       ipi_en0_regs[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + EN0);
-       ipi_en0_regs[4] = (void *)
+       ipi_en0_regs[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + EN0);
-       ipi_en0_regs[5] = (void *)
+       ipi_en0_regs[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + EN0);
-       ipi_en0_regs[6] = (void *)
+       ipi_en0_regs[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + EN0);
-       ipi_en0_regs[7] = (void *)
+       ipi_en0_regs[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + EN0);
-       ipi_en0_regs[8] = (void *)
+       ipi_en0_regs[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + EN0);
-       ipi_en0_regs[9] = (void *)
+       ipi_en0_regs[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + EN0);
-       ipi_en0_regs[10] = (void *)
+       ipi_en0_regs[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + EN0);
-       ipi_en0_regs[11] = (void *)
+       ipi_en0_regs[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + EN0);
-       ipi_en0_regs[12] = (void *)
+       ipi_en0_regs[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + EN0);
-       ipi_en0_regs[13] = (void *)
+       ipi_en0_regs[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + EN0);
-       ipi_en0_regs[14] = (void *)
+       ipi_en0_regs[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + EN0);
-       ipi_en0_regs[15] = (void *)
+       ipi_en0_regs[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + EN0);
 }
 
 static void ipi_mailbox_buf_init(void)
 {
-       ipi_mailbox_buf[0] = (void *)
+       ipi_mailbox_buf[0] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + BUF);
-       ipi_mailbox_buf[1] = (void *)
+       ipi_mailbox_buf[1] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + BUF);
-       ipi_mailbox_buf[2] = (void *)
+       ipi_mailbox_buf[2] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + BUF);
-       ipi_mailbox_buf[3] = (void *)
+       ipi_mailbox_buf[3] = (void __iomem *)
                (SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + BUF);
-       ipi_mailbox_buf[4] = (void *)
+       ipi_mailbox_buf[4] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + BUF);
-       ipi_mailbox_buf[5] = (void *)
+       ipi_mailbox_buf[5] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + BUF);
-       ipi_mailbox_buf[6] = (void *)
+       ipi_mailbox_buf[6] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + BUF);
-       ipi_mailbox_buf[7] = (void *)
+       ipi_mailbox_buf[7] = (void __iomem *)
                (SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + BUF);
-       ipi_mailbox_buf[8] = (void *)
+       ipi_mailbox_buf[8] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + BUF);
-       ipi_mailbox_buf[9] = (void *)
+       ipi_mailbox_buf[9] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + BUF);
-       ipi_mailbox_buf[10] = (void *)
+       ipi_mailbox_buf[10] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + BUF);
-       ipi_mailbox_buf[11] = (void *)
+       ipi_mailbox_buf[11] = (void __iomem *)
                (SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + BUF);
-       ipi_mailbox_buf[12] = (void *)
+       ipi_mailbox_buf[12] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + BUF);
-       ipi_mailbox_buf[13] = (void *)
+       ipi_mailbox_buf[13] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + BUF);
-       ipi_mailbox_buf[14] = (void *)
+       ipi_mailbox_buf[14] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + BUF);
-       ipi_mailbox_buf[15] = (void *)
+       ipi_mailbox_buf[15] = (void __iomem *)
                (SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + BUF);
 }
 
index 43181ac..42d0516 100644 (file)
@@ -8,8 +8,8 @@
  * Copyright (C) 2012  MIPS Technologies, Inc.
  * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/export.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
index 00fef57..2705d7d 100644 (file)
@@ -11,8 +11,8 @@
  * Copyright (C) 2012  MIPS Technologies, Inc.
  * Copyright (C) 2012  Ralf Baechle <ralf@linux-mips.org>
  */
+#include <linux/export.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 #define FASTPATH_SIZE  128
index 79e29bf..80f7293 100644 (file)
@@ -13,9 +13,9 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/clk.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/of_pci.h>
+#include <linux/platform_device.h>
 
 #include <asm/addrspace.h>
 
index e9dd014..1cada09 100644 (file)
@@ -13,9 +13,8 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 #include <linux/init.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
-#include <linux/of_pci.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach-ralink/rt288x.h>
 
index f695320..73be568 100644 (file)
@@ -5,7 +5,7 @@
  */
 #include <linux/init.h>
 #include <linux/io.h>
-#include <linux/of_platform.h>
+#include <linux/spinlock.h>
 
 #include <asm/mach-pic32/pic32.h>
 
index f395ae2..25341b2 100644 (file)
@@ -5,8 +5,10 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 
 #include <asm/mach-ralink/ralink_regs.h>
 
index fa353bc..46aef0a 100644 (file)
@@ -7,7 +7,7 @@
 
 #include <linux/io.h>
 #include <linux/bitops.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/irqdomain.h>
index 45d60c0..7f90068 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/of_fdt.h>
 #include <linux/kernel.h>
 #include <linux/memblock.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 
 #include <asm/reboot.h>
index aaac1e6..c3b9686 100644 (file)
@@ -7,8 +7,6 @@
  */
 
 #include <linux/string.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
 
 #include <asm/bootinfo.h>
 #include <asm/addrspace.h>
index e988455..5ae30b7 100644 (file)
@@ -51,6 +51,7 @@ int __init txx9_pci66_check(struct pci_controller *hose, int top_bus,
        unsigned short vid;
        int cap66 = -1;
        u16 stat;
+       int ret;
 
        /* It seems SLC90E66 needs some time after PCI reset... */
        mdelay(80);
@@ -60,9 +61,9 @@ int __init txx9_pci66_check(struct pci_controller *hose, int top_bus,
        for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) {
                if (PCI_FUNC(pci_devfn))
                        continue;
-               if (early_read_config_word(hose, top_bus, current_bus,
-                                          pci_devfn, PCI_VENDOR_ID, &vid) !=
-                   PCIBIOS_SUCCESSFUL)
+               ret = early_read_config_word(hose, top_bus, current_bus,
+                                            pci_devfn, PCI_VENDOR_ID, &vid);
+               if (ret != PCIBIOS_SUCCESSFUL)
                        continue;
                if (vid == 0xffff)
                        continue;
@@ -343,26 +344,28 @@ static void tc35815_fixup(struct pci_dev *dev)
 
 static void final_fixup(struct pci_dev *dev)
 {
+       unsigned long timeout;
        unsigned char bist;
+       int ret;
 
        /* Do build-in self test */
-       if (pci_read_config_byte(dev, PCI_BIST, &bist) == PCIBIOS_SUCCESSFUL &&
-           (bist & PCI_BIST_CAPABLE)) {
-               unsigned long timeout;
-               pci_set_power_state(dev, PCI_D0);
-               pr_info("PCI: %s BIST...", pci_name(dev));
-               pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START);
-               timeout = jiffies + HZ * 2;     /* timeout after 2 sec */
-               do {
-                       pci_read_config_byte(dev, PCI_BIST, &bist);
-                       if (time_after(jiffies, timeout))
-                               break;
-               } while (bist & PCI_BIST_START);
-               if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START))
-                       pr_cont("failed. (0x%x)\n", bist);
-               else
-                       pr_cont("OK.\n");
-       }
+       ret = pci_read_config_byte(dev, PCI_BIST, &bist);
+       if ((ret != PCIBIOS_SUCCESSFUL) || !(bist & PCI_BIST_CAPABLE))
+               return;
+
+       pci_set_power_state(dev, PCI_D0);
+       pr_info("PCI: %s BIST...", pci_name(dev));
+       pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START);
+       timeout = jiffies + HZ * 2;     /* timeout after 2 sec */
+       do {
+               pci_read_config_byte(dev, PCI_BIST, &bist);
+               if (time_after(jiffies, timeout))
+                       break;
+       } while (bist & PCI_BIST_START);
+       if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START))
+               pr_cont("failed. (0x%x)\n", bist);
+       else
+               pr_cont("OK.\n");
 }
 
 #ifdef CONFIG_TOSHIBA_FPCIB0
index d90b657..836465e 100644 (file)
@@ -94,7 +94,9 @@ VERSION
 #ifndef CONFIG_MIPS_DISABLE_VDSO
        global:
                __vdso_clock_gettime;
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
                __vdso_gettimeofday;
+#endif
                __vdso_clock_getres;
 #if _MIPS_SIM != _MIPS_SIM_ABI64
                __vdso_clock_gettime64;
diff --git a/arch/openrisc/include/asm/bug.h b/arch/openrisc/include/asm/bug.h
new file mode 100644 (file)
index 0000000..6d04776
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_OPENRISC_BUG_H
+#define __ASM_OPENRISC_BUG_H
+
+#include <asm-generic/bug.h>
+
+struct pt_regs;
+
+void __noreturn die(const char *str, struct pt_regs *regs, long err);
+
+#endif /* __ASM_OPENRISC_BUG_H */
index 52b0d7e..44fc1fd 100644 (file)
@@ -72,8 +72,15 @@ typedef struct page *pgtable_t;
 #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET))
 #define __pa(x) ((unsigned long) (x) - PAGE_OFFSET)
 
-#define virt_to_pfn(kaddr)      (__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn)        __va((pfn) << PAGE_SHIFT)
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+       return __pa(kaddr) >> PAGE_SHIFT;
+}
+
+static inline void * pfn_to_virt(unsigned long pfn)
+{
+       return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT);
+}
 
 #define virt_to_page(addr) \
        (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
index ed9efb4..3b736e7 100644 (file)
@@ -73,6 +73,7 @@ struct thread_struct {
 
 void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 unsigned long __get_wchan(struct task_struct *p);
+void show_registers(struct pt_regs *regs);
 
 #define cpu_relax()     barrier()
 
index dfa558f..86e0292 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 #define __KERNEL_SYSCALLS__
+#include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
@@ -38,6 +39,7 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/spr_defs.h>
+#include <asm/switch_to.h>
 
 #include <linux/smp.h>
 
@@ -119,8 +121,6 @@ void flush_thread(void)
 
 void show_regs(struct pt_regs *regs)
 {
-       extern void show_registers(struct pt_regs *regs);
-
        show_regs_print_info(KERN_DEFAULT);
        /* __PHX__ cleanup this mess */
        show_registers(regs);
index 0b7d2ca..1eeac3b 100644 (file)
 #include <asm/thread_info.h>
 #include <asm/page.h>
 
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
+
 /*
  * Copy the thread state to a regset that can be interpreted by userspace.
  *
index 2e7257a..e2f21a5 100644 (file)
@@ -34,6 +34,11 @@ struct rt_sigframe {
        unsigned char retcode[16];      /* trampoline code */
 };
 
+asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs);
+
+asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+                              int syscall);
+
 static int restore_sigcontext(struct pt_regs *regs,
                              struct sigcontext __user *sc)
 {
@@ -224,7 +229,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
  * mode below.
  */
 
-int do_signal(struct pt_regs *regs, int syscall)
+static int do_signal(struct pt_regs *regs, int syscall)
 {
        struct ksignal ksig;
        unsigned long continue_addr = 0;
index 0a7a059..1c5a2d7 100644 (file)
@@ -23,6 +23,8 @@
 #include <asm/cacheflush.h>
 #include <asm/time.h>
 
+asmlinkage __init void secondary_start_kernel(void);
+
 static void (*smp_cross_call)(const struct cpumask *, unsigned int);
 
 unsigned long secondary_release = -1;
index 8e26c1a..764c7bf 100644 (file)
@@ -25,6 +25,8 @@
 #include <asm/cpuinfo.h>
 #include <asm/time.h>
 
+irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs);
+
 /* Test the timer ticks to count, used in sync routine */
 inline void openrisc_timer_set(unsigned long count)
 {
index 0aa6b07..9370888 100644 (file)
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 
+#include <asm/bug.h>
 #include <asm/io.h>
+#include <asm/processor.h>
 #include <asm/unwinder.h>
 #include <asm/sections.h>
 
-static int kstack_depth_to_print = 0x180;
 int lwa_flag;
 static unsigned long __user *lwa_addr;
 
+asmlinkage void unhandled_exception(struct pt_regs *regs, int ea, int vector);
+asmlinkage void do_trap(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_fpe_trap(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_illegal_instruction(struct pt_regs *regs,
+                                      unsigned long address);
+
 static void print_trace(void *data, unsigned long addr, int reliable)
 {
        const char *loglvl = data;
@@ -143,80 +152,6 @@ bad:
        printk("\n");
 }
 
-void nommu_dump_state(struct pt_regs *regs,
-                     unsigned long ea, unsigned long vector)
-{
-       int i;
-       unsigned long addr, stack = regs->sp;
-
-       printk("\n\r[nommu_dump_state] :: ea %lx, vector %lx\n\r", ea, vector);
-
-       printk("CPU #: %d\n"
-              "   PC: %08lx    SR: %08lx    SP: %08lx\n",
-              0, regs->pc, regs->sr, regs->sp);
-       printk("GPR00: %08lx GPR01: %08lx GPR02: %08lx GPR03: %08lx\n",
-              0L, regs->gpr[1], regs->gpr[2], regs->gpr[3]);
-       printk("GPR04: %08lx GPR05: %08lx GPR06: %08lx GPR07: %08lx\n",
-              regs->gpr[4], regs->gpr[5], regs->gpr[6], regs->gpr[7]);
-       printk("GPR08: %08lx GPR09: %08lx GPR10: %08lx GPR11: %08lx\n",
-              regs->gpr[8], regs->gpr[9], regs->gpr[10], regs->gpr[11]);
-       printk("GPR12: %08lx GPR13: %08lx GPR14: %08lx GPR15: %08lx\n",
-              regs->gpr[12], regs->gpr[13], regs->gpr[14], regs->gpr[15]);
-       printk("GPR16: %08lx GPR17: %08lx GPR18: %08lx GPR19: %08lx\n",
-              regs->gpr[16], regs->gpr[17], regs->gpr[18], regs->gpr[19]);
-       printk("GPR20: %08lx GPR21: %08lx GPR22: %08lx GPR23: %08lx\n",
-              regs->gpr[20], regs->gpr[21], regs->gpr[22], regs->gpr[23]);
-       printk("GPR24: %08lx GPR25: %08lx GPR26: %08lx GPR27: %08lx\n",
-              regs->gpr[24], regs->gpr[25], regs->gpr[26], regs->gpr[27]);
-       printk("GPR28: %08lx GPR29: %08lx GPR30: %08lx GPR31: %08lx\n",
-              regs->gpr[28], regs->gpr[29], regs->gpr[30], regs->gpr[31]);
-       printk("  RES: %08lx oGPR11: %08lx\n",
-              regs->gpr[11], regs->orig_gpr11);
-
-       printk("Process %s (pid: %d, stackpage=%08lx)\n",
-              ((struct task_struct *)(__pa(current)))->comm,
-              ((struct task_struct *)(__pa(current)))->pid,
-              (unsigned long)current);
-
-       printk("\nStack: ");
-       printk("Stack dump [0x%08lx]:\n", (unsigned long)stack);
-       for (i = 0; i < kstack_depth_to_print; i++) {
-               if (((long)stack & (THREAD_SIZE - 1)) == 0)
-                       break;
-               stack++;
-
-               printk("%lx :: sp + %02d: 0x%08lx\n", stack, i * 4,
-                      *((unsigned long *)(__pa(stack))));
-       }
-       printk("\n");
-
-       printk("Call Trace:   ");
-       i = 1;
-       while (((long)stack & (THREAD_SIZE - 1)) != 0) {
-               addr = *((unsigned long *)__pa(stack));
-               stack++;
-
-               if (kernel_text_address(addr)) {
-                       if (i && ((i % 6) == 0))
-                               printk("\n ");
-                       printk(" [<%08lx>]", addr);
-                       i++;
-               }
-       }
-       printk("\n");
-
-       printk("\nCode: ");
-
-       for (i = -24; i < 24; i++) {
-               unsigned long word;
-
-               word = ((unsigned long *)(__pa(regs->pc)))[i];
-
-               print_data(regs->pc, word, i);
-       }
-       printk("\n");
-}
-
 /* This is normally the 'Oops' routine */
 void __noreturn die(const char *str, struct pt_regs *regs, long err)
 {
index a9dcd43..29e232d 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/perf_event.h>
 
 #include <linux/uaccess.h>
+#include <asm/bug.h>
 #include <asm/mmu_context.h>
 #include <asm/siginfo.h>
 #include <asm/signal.h>
@@ -30,7 +31,8 @@
  */
 volatile pgd_t *current_pgd[NR_CPUS];
 
-extern void __noreturn die(char *, struct pt_regs *, long);
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
+                             unsigned long vector, int write_acc);
 
 /*
  * This routine handles page faults.  It determines the address,
index d531ab8..1dcd78c 100644 (file)
@@ -123,8 +123,6 @@ static void __init map_ram(void)
 
 void __init paging_init(void)
 {
-       extern void tlb_init(void);
-
        int i;
 
        printk(KERN_INFO "Setting up paging and PTEs.\n");
index 91c8259..f59ea4c 100644 (file)
@@ -22,7 +22,7 @@
 
 extern int mem_init_done;
 
-/**
+/*
  * OK, this one's a bit tricky... ioremap can get called before memory is
  * initialized (early serial console does this) and will want to alloc a page
  * for its mapping.  No userspace pages will ever get allocated before memory
index e2f2a3c..3115f2e 100644 (file)
@@ -182,12 +182,3 @@ void destroy_context(struct mm_struct *mm)
        flush_tlb_mm(mm);
 
 }
-
-/* called once during VM initialization, from init.c */
-
-void __init tlb_init(void)
-{
-       /* Do nothing... */
-       /* invalidate the entire TLB */
-       /* flush_tlb_all(); */
-}
diff --git a/arch/parisc/include/asm/ide.h b/arch/parisc/include/asm/ide.h
deleted file mode 100644 (file)
index 7aa75b9..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  linux/include/asm-parisc/ide.h
- *
- *  Copyright (C) 1994-1996  Linus Torvalds & authors
- */
-
-/*
- *  This file contains the PARISC architecture specific IDE code.
- */
-
-#ifndef __ASM_PARISC_IDE_H
-#define __ASM_PARISC_IDE_H
-
-/* Generic I/O and MEMIO string operations.  */
-
-#define __ide_insw     insw
-#define __ide_insl     insl
-#define __ide_outsw    outsw
-#define __ide_outsl    outsl
-
-static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               *(u16 *)addr = __raw_readw(port);
-               addr += 2;
-       }
-}
-
-static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               *(u32 *)addr = __raw_readl(port);
-               addr += 4;
-       }
-}
-
-static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               __raw_writew(*(u16 *)addr, port);
-               addr += 2;
-       }
-}
-
-static __inline__ void __ide_mm_outsl(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               __raw_writel(*(u32 *)addr, port);
-               addr += 4;
-       }
-}
-
-#endif /* __ASM_PARISC_IDE_H */
index 1c91a35..0d54e29 100644 (file)
@@ -1,3 +1,5 @@
+# Help: Common security options for PowerPC builds
+
 # This is the equivalent of booting with lockdown=integrity
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
@@ -12,4 +14,4 @@ CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
 
 # UBSAN bounds checking is very cheap and good for hardening
 CONFIG_UBSAN=y
-# CONFIG_UBSAN_MISC is not set
\ No newline at end of file
+# CONFIG_UBSAN_MISC is not set
diff --git a/arch/powerpc/include/asm/ide.h b/arch/powerpc/include/asm/ide.h
deleted file mode 100644 (file)
index ce87a44..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 1994-1996 Linus Torvalds & authors
- *
- *  This file contains the powerpc architecture specific IDE code.
- */
-#ifndef _ASM_POWERPC_IDE_H
-#define _ASM_POWERPC_IDE_H
-
-#include <linux/compiler.h>
-#include <asm/io.h>
-
-#define __ide_mm_insw(p, a, c) readsw((void __iomem *)(p), (a), (c))
-#define __ide_mm_insl(p, a, c) readsl((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsw(p, a, c)        writesw((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsl(p, a, c)        writesl((void __iomem *)(p), (a), (c))
-
-#endif /* _ASM_POWERPC_IDE_H */
index 5138dce..d607ab0 100644 (file)
@@ -273,7 +273,14 @@ config RISCV_DMA_NONCOHERENT
        select ARCH_HAS_SYNC_DMA_FOR_CPU
        select ARCH_HAS_SYNC_DMA_FOR_DEVICE
        select DMA_BOUNCE_UNALIGNED_KMALLOC if SWIOTLB
-       select DMA_DIRECT_REMAP
+       select DMA_DIRECT_REMAP if MMU
+
+config RISCV_NONSTANDARD_CACHE_OPS
+       bool
+       depends on RISCV_DMA_NONCOHERENT
+       help
+         This enables function pointer support for non-standard noncoherent
+         systems to handle cache management.
 
 config AS_HAS_INSN
        def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero)
@@ -713,6 +720,25 @@ config RELOCATABLE
 
           If unsure, say N.
 
+config RANDOMIZE_BASE
+        bool "Randomize the address of the kernel image"
+        select RELOCATABLE
+        depends on MMU && 64BIT && !XIP_KERNEL
+        help
+          Randomizes the virtual address at which the kernel image is
+          loaded, as a security feature that deters exploit attempts
+          relying on knowledge of the location of kernel internals.
+
+          It is the bootloader's job to provide entropy, by passing a
+          random u64 value in /chosen/kaslr-seed at kernel entry.
+
+          When booting via the UEFI stub, it will invoke the firmware's
+          EFI_RNG_PROTOCOL implementation (if available) to supply entropy
+          to the kernel proper. In addition, it will randomise the physical
+          location of the kernel Image as well.
+
+          If unsure, say N.
+
 endmenu # "Kernel features"
 
 menu "Boot options"
index 0c8f465..566bcef 100644 (file)
@@ -1,5 +1,26 @@
 menu "CPU errata selection"
 
+config ERRATA_ANDES
+       bool "Andes AX45MP errata"
+       depends on RISCV_ALTERNATIVE && RISCV_SBI
+       help
+         All Andes errata Kconfig depend on this Kconfig. Disabling
+         this Kconfig will disable all Andes errata. Please say "Y"
+         here if your platform uses Andes CPU cores.
+
+         Otherwise, please say "N" here to avoid unnecessary overhead.
+
+config ERRATA_ANDES_CMO
+       bool "Apply Andes cache management errata"
+       depends on ERRATA_ANDES && ARCH_R9A07G043
+       select RISCV_DMA_NONCOHERENT
+       default y
+       help
+         This will apply the cache management errata to handle the
+         non-standard handling on non-coherent operations on Andes cores.
+
+         If you don't know what to do here, say "Y".
+
 config ERRATA_SIFIVE
        bool "SiFive errata"
        depends on RISCV_ALTERNATIVE
index f6af0f7..16ee163 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Build a 32-bit image
 CONFIG_ARCH_RV32I=y
 CONFIG_32BIT=y
 # CONFIG_PORTABLE is not set
index 313edc5..d872a2d 100644 (file)
@@ -1,2 +1,3 @@
+# Help: Build a 64-bit image
 CONFIG_ARCH_RV64I=y
 CONFIG_64BIT=y
index 7b2637c..8a27394 100644 (file)
@@ -2,5 +2,6 @@ ifdef CONFIG_RELOCATABLE
 KBUILD_CFLAGS += -fno-pie
 endif
 
+obj-$(CONFIG_ERRATA_ANDES) += andes/
 obj-$(CONFIG_ERRATA_SIFIVE) += sifive/
 obj-$(CONFIG_ERRATA_THEAD) += thead/
diff --git a/arch/riscv/errata/andes/Makefile b/arch/riscv/errata/andes/Makefile
new file mode 100644 (file)
index 0000000..2d644e1
--- /dev/null
@@ -0,0 +1 @@
+obj-y += errata.o
diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c
new file mode 100644 (file)
index 0000000..197db68
--- /dev/null
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Erratas to be applied for Andes CPU cores
+ *
+ *  Copyright (C) 2023 Renesas Electronics Corporation.
+ *
+ * Author: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
+ */
+
+#include <linux/memory.h>
+#include <linux/module.h>
+
+#include <asm/alternative.h>
+#include <asm/cacheflush.h>
+#include <asm/errata_list.h>
+#include <asm/patch.h>
+#include <asm/processor.h>
+#include <asm/sbi.h>
+#include <asm/vendorid_list.h>
+
+#define ANDESTECH_AX45MP_MARCHID       0x8000000000008a45UL
+#define ANDESTECH_AX45MP_MIMPID                0x500UL
+#define ANDESTECH_SBI_EXT_ANDES                0x0900031E
+
+#define ANDES_SBI_EXT_IOCP_SW_WORKAROUND       1
+
+static long ax45mp_iocp_sw_workaround(void)
+{
+       struct sbiret ret;
+
+       /*
+        * ANDES_SBI_EXT_IOCP_SW_WORKAROUND SBI EXT checks if the IOCP is missing and
+        * cache is controllable only then CMO will be applied to the platform.
+        */
+       ret = sbi_ecall(ANDESTECH_SBI_EXT_ANDES, ANDES_SBI_EXT_IOCP_SW_WORKAROUND,
+                       0, 0, 0, 0, 0, 0);
+
+       return ret.error ? 0 : ret.value;
+}
+
+static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid)
+{
+       if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO))
+               return false;
+
+       if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID)
+               return false;
+
+       if (!ax45mp_iocp_sw_workaround())
+               return false;
+
+       /* Set this just to make core cbo code happy */
+       riscv_cbom_block_size = 1;
+       riscv_noncoherent_supported();
+
+       return true;
+}
+
+void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
+                                             unsigned long archid, unsigned long impid,
+                                             unsigned int stage)
+{
+       errata_probe_iocp(stage, archid, impid);
+
+       /* we have nothing to patch here ATM so just return back */
+}
index be84b14..0554ed4 100644 (file)
@@ -120,11 +120,3 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
        if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
                local_flush_icache_all();
 }
-
-void thead_feature_probe_func(unsigned int cpu,
-                             unsigned long archid,
-                             unsigned long impid)
-{
-       if ((archid == 0) && (impid == 0))
-               per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
-}
index 6a41537..3c2b59b 100644 (file)
@@ -30,7 +30,6 @@
 #define ALT_OLD_PTR(a)                 __ALT_PTR(a, old_offset)
 #define ALT_ALT_PTR(a)                 __ALT_PTR(a, alt_offset)
 
-void probe_vendor_features(unsigned int cpu);
 void __init apply_boot_alternatives(void);
 void __init apply_early_boot_alternatives(void);
 void apply_module_alternatives(void *start, size_t length);
@@ -46,6 +45,9 @@ struct alt_entry {
        u32 patch_id;           /* The patch ID (erratum ID or cpufeature ID) */
 };
 
+void andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
+                            unsigned long archid, unsigned long impid,
+                            unsigned int stage);
 void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
                              unsigned long archid, unsigned long impid,
                              unsigned int stage);
@@ -53,15 +55,11 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
                             unsigned long archid, unsigned long impid,
                             unsigned int stage);
 
-void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
-                             unsigned long impid);
-
 void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end,
                                 unsigned int stage);
 
 #else /* CONFIG_RISCV_ALTERNATIVE */
 
-static inline void probe_vendor_features(unsigned int cpu) { }
 static inline void apply_boot_alternatives(void) { }
 static inline void apply_early_boot_alternatives(void) { }
 static inline void apply_module_alternatives(void *start, size_t length) { }
index 23fed53..d0345bd 100644 (file)
@@ -30,4 +30,6 @@ DECLARE_PER_CPU(long, misaligned_access_speed);
 /* Per-cpu ISA extensions. */
 extern struct riscv_isainfo hart_isa[NR_CPUS];
 
+void check_unaligned_access(int cpu);
+
 #endif
index 7bac43a..777cb82 100644 (file)
@@ -54,6 +54,7 @@
 #ifndef CONFIG_64BIT
 #define SATP_PPN       _AC(0x003FFFFF, UL)
 #define SATP_MODE_32   _AC(0x80000000, UL)
+#define SATP_MODE_SHIFT        31
 #define SATP_ASID_BITS 9
 #define SATP_ASID_SHIFT        22
 #define SATP_ASID_MASK _AC(0x1FF, UL)
@@ -62,6 +63,7 @@
 #define SATP_MODE_39   _AC(0x8000000000000000, UL)
 #define SATP_MODE_48   _AC(0x9000000000000000, UL)
 #define SATP_MODE_57   _AC(0xa000000000000000, UL)
+#define SATP_MODE_SHIFT        60
 #define SATP_ASID_BITS 16
 #define SATP_ASID_SHIFT        44
 #define SATP_ASID_MASK _AC(0xFFFF, UL)
diff --git a/arch/riscv/include/asm/dma-noncoherent.h b/arch/riscv/include/asm/dma-noncoherent.h
new file mode 100644 (file)
index 0000000..312cfa0
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 Renesas Electronics Corp.
+ */
+
+#ifndef __ASM_DMA_NONCOHERENT_H
+#define __ASM_DMA_NONCOHERENT_H
+
+#include <linux/dma-direct.h>
+
+/*
+ * struct riscv_nonstd_cache_ops - Structure for non-standard CMO function pointers
+ *
+ * @wback: Function pointer for cache writeback
+ * @inv: Function pointer for invalidating cache
+ * @wback_inv: Function pointer for flushing the cache (writeback + invalidating)
+ */
+struct riscv_nonstd_cache_ops {
+       void (*wback)(phys_addr_t paddr, size_t size);
+       void (*inv)(phys_addr_t paddr, size_t size);
+       void (*wback_inv)(phys_addr_t paddr, size_t size);
+};
+
+extern struct riscv_nonstd_cache_ops noncoherent_cache_ops;
+
+void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops);
+
+#endif /* __ASM_DMA_NONCOHERENT_H */
index 8a6a128..46a3559 100644 (file)
@@ -45,4 +45,6 @@ void arch_efi_call_virt_teardown(void);
 
 unsigned long stext_offset(void);
 
+void efi_icache_sync(unsigned long start, unsigned long end);
+
 #endif /* _ASM_EFI_H */
index fb1a810..e2ecd01 100644 (file)
 #include <asm/hwcap.h>
 #include <asm/vendorid_list.h>
 
+#ifdef CONFIG_ERRATA_ANDES
+#define ERRATA_ANDESTECH_NO_IOCP       0
+#define ERRATA_ANDESTECH_NUMBER                1
+#endif
+
 #ifdef CONFIG_ERRATA_SIFIVE
 #define        ERRATA_SIFIVE_CIP_453 0
 #define        ERRATA_SIFIVE_CIP_1200 1
index 2d8ee53..1ebf20d 100644 (file)
@@ -337,6 +337,15 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
 
+void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu);
+unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
+                                   u64 __user *uindices);
+int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+                          const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+                          const struct kvm_one_reg *reg);
+
 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
index ff994fd..27f5bcc 100644 (file)
@@ -74,9 +74,7 @@ static inline void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
 #endif
 
 int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg,
-                                 unsigned long rtype);
+                                 const struct kvm_one_reg *reg);
 int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg,
-                                 unsigned long rtype);
+                                 const struct kvm_one_reg *reg);
 #endif
index b55ba20..5488ecc 100644 (file)
@@ -106,6 +106,7 @@ typedef struct page *pgtable_t;
 struct kernel_mapping {
        unsigned long page_offset;
        unsigned long virt_addr;
+       unsigned long virt_offset;
        uintptr_t phys_addr;
        uintptr_t size;
        /* Offset between linear mapping virtual address and kernel load address */
@@ -185,6 +186,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
 
 #define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))
 
+unsigned long kaslr_offset(void);
+
 #endif /* __ASSEMBLY__ */
 
 #define virt_addr_valid(vaddr) ({                                              \
index 63c9883..e88b52d 100644 (file)
@@ -7,6 +7,7 @@
 #define _ASM_RISCV_PATCH_H
 
 int patch_text_nosync(void *addr, const void *insns, size_t len);
+int patch_text_set_nosync(void *addr, u8 c, size_t len);
 int patch_text(void *addr, u32 *insns, int ninsns);
 
 extern int riscv_patch_in_stop_machine;
index cb89af3..e55407a 100644 (file)
@@ -5,6 +5,7 @@
 #ifndef ASM_VENDOR_LIST_H
 #define ASM_VENDOR_LIST_H
 
+#define ANDESTECH_VENDOR_ID    0x31e
 #define SIFIVE_VENDOR_ID       0x489
 #define THEAD_VENDOR_ID                0x5b7
 
index 930fdc4..992c5e4 100644 (file)
@@ -55,6 +55,7 @@ struct kvm_riscv_config {
        unsigned long marchid;
        unsigned long mimpid;
        unsigned long zicboz_block_size;
+       unsigned long satp_mode;
 };
 
 /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -124,6 +125,12 @@ enum KVM_RISCV_ISA_EXT_ID {
        KVM_RISCV_ISA_EXT_SSAIA,
        KVM_RISCV_ISA_EXT_V,
        KVM_RISCV_ISA_EXT_SVNAPOT,
+       KVM_RISCV_ISA_EXT_ZBA,
+       KVM_RISCV_ISA_EXT_ZBS,
+       KVM_RISCV_ISA_EXT_ZICNTR,
+       KVM_RISCV_ISA_EXT_ZICSR,
+       KVM_RISCV_ISA_EXT_ZIFENCEI,
+       KVM_RISCV_ISA_EXT_ZIHPM,
        KVM_RISCV_ISA_EXT_MAX,
 };
 
@@ -193,6 +200,15 @@ enum KVM_RISCV_SBI_EXT_ID {
 
 /* ISA Extension registers are mapped as type 7 */
 #define KVM_REG_RISCV_ISA_EXT          (0x07 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_SINGLE       (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_EN     (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_DIS    (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_REG(__ext_id)  \
+               ((__ext_id) / __BITS_PER_LONG)
+#define KVM_REG_RISCV_ISA_MULTI_MASK(__ext_id) \
+               (1UL << ((__ext_id) % __BITS_PER_LONG))
+#define KVM_REG_RISCV_ISA_MULTI_REG_LAST       \
+               KVM_REG_RISCV_ISA_MULTI_REG(KVM_RISCV_ISA_EXT_MAX - 1)
 
 /* SBI extension registers are mapped as type 8 */
 #define KVM_REG_RISCV_SBI_EXT          (0x08 << KVM_REG_RISCV_TYPE_SHIFT)
index 6d2d9af..a38268b 100644 (file)
@@ -108,13 +108,18 @@ struct __riscv_v_ext_state {
         * In signal handler, datap will be set a correct user stack offset
         * and vector registers will be copied to the address of datap
         * pointer.
-        *
-        * In ptrace syscall, datap will be set to zero and the vector
-        * registers will be copied to the address right after this
-        * structure.
         */
 };
 
+struct __riscv_v_regset_state {
+       unsigned long vstart;
+       unsigned long vl;
+       unsigned long vtype;
+       unsigned long vcsr;
+       unsigned long vlenb;
+       char vreg[];
+};
+
 /*
  * According to spec: The number of bits in a single vector register,
  * VLEN >= ELEN, which must be a power of 2, and must be no greater than
index 6ac56af..95cf25d 100644 (file)
@@ -38,6 +38,7 @@ extra-y += vmlinux.lds
 obj-y  += head.o
 obj-y  += soc.o
 obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o
+obj-y  += copy-unaligned.o
 obj-y  += cpu.o
 obj-y  += cpufeature.o
 obj-y  += entry.o
index 6b75788..319a1da 100644 (file)
@@ -27,8 +27,6 @@ struct cpu_manufacturer_info_t {
        void (*patch_func)(struct alt_entry *begin, struct alt_entry *end,
                                  unsigned long archid, unsigned long impid,
                                  unsigned int stage);
-       void (*feature_probe_func)(unsigned int cpu, unsigned long archid,
-                                  unsigned long impid);
 };
 
 static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
@@ -43,8 +41,12 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info
        cpu_mfr_info->imp_id = sbi_get_mimpid();
 #endif
 
-       cpu_mfr_info->feature_probe_func = NULL;
        switch (cpu_mfr_info->vendor_id) {
+#ifdef CONFIG_ERRATA_ANDES
+       case ANDESTECH_VENDOR_ID:
+               cpu_mfr_info->patch_func = andes_errata_patch_func;
+               break;
+#endif
 #ifdef CONFIG_ERRATA_SIFIVE
        case SIFIVE_VENDOR_ID:
                cpu_mfr_info->patch_func = sifive_errata_patch_func;
@@ -53,7 +55,6 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info
 #ifdef CONFIG_ERRATA_THEAD
        case THEAD_VENDOR_ID:
                cpu_mfr_info->patch_func = thead_errata_patch_func;
-               cpu_mfr_info->feature_probe_func = thead_feature_probe_func;
                break;
 #endif
        default:
@@ -143,20 +144,6 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len,
        }
 }
 
-/* Called on each CPU as it starts */
-void probe_vendor_features(unsigned int cpu)
-{
-       struct cpu_manufacturer_info_t cpu_mfr_info;
-
-       riscv_fill_cpu_mfr_info(&cpu_mfr_info);
-       if (!cpu_mfr_info.feature_probe_func)
-               return;
-
-       cpu_mfr_info.feature_probe_func(cpu,
-                                       cpu_mfr_info.arch_id,
-                                       cpu_mfr_info.imp_id);
-}
-
 /*
  * This is called very early in the boot process (directly after we run
  * a feature detect on the boot CPU). No need to worry about other CPUs
@@ -211,7 +198,6 @@ void __init apply_boot_alternatives(void)
        /* If called on non-boot cpu things could go wrong */
        WARN_ON(smp_processor_id() != 0);
 
-       probe_vendor_features(0);
        _apply_alternatives((struct alt_entry *)__alt_start,
                            (struct alt_entry *)__alt_end,
                            RISCV_ALTERNATIVES_BOOT);
diff --git a/arch/riscv/kernel/copy-unaligned.S b/arch/riscv/kernel/copy-unaligned.S
new file mode 100644 (file)
index 0000000..cfdecfb
--- /dev/null
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023 Rivos Inc. */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+       .text
+
+/* void __riscv_copy_words_unaligned(void *, const void *, size_t) */
+/* Performs a memcpy without aligning buffers, using word loads and stores. */
+/* Note: The size is truncated to a multiple of 8 * SZREG */
+ENTRY(__riscv_copy_words_unaligned)
+       andi  a4, a2, ~((8*SZREG)-1)
+       beqz  a4, 2f
+       add   a3, a1, a4
+1:
+       REG_L a4,       0(a1)
+       REG_L a5,   SZREG(a1)
+       REG_L a6, 2*SZREG(a1)
+       REG_L a7, 3*SZREG(a1)
+       REG_L t0, 4*SZREG(a1)
+       REG_L t1, 5*SZREG(a1)
+       REG_L t2, 6*SZREG(a1)
+       REG_L t3, 7*SZREG(a1)
+       REG_S a4,       0(a0)
+       REG_S a5,   SZREG(a0)
+       REG_S a6, 2*SZREG(a0)
+       REG_S a7, 3*SZREG(a0)
+       REG_S t0, 4*SZREG(a0)
+       REG_S t1, 5*SZREG(a0)
+       REG_S t2, 6*SZREG(a0)
+       REG_S t3, 7*SZREG(a0)
+       addi  a0, a0, 8*SZREG
+       addi  a1, a1, 8*SZREG
+       bltu  a1, a3, 1b
+
+2:
+       ret
+END(__riscv_copy_words_unaligned)
+
+/* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */
+/* Performs a memcpy without aligning buffers, using only byte accesses. */
+/* Note: The size is truncated to a multiple of 8 */
+ENTRY(__riscv_copy_bytes_unaligned)
+       andi a4, a2, ~(8-1)
+       beqz a4, 2f
+       add  a3, a1, a4
+1:
+       lb   a4, 0(a1)
+       lb   a5, 1(a1)
+       lb   a6, 2(a1)
+       lb   a7, 3(a1)
+       lb   t0, 4(a1)
+       lb   t1, 5(a1)
+       lb   t2, 6(a1)
+       lb   t3, 7(a1)
+       sb   a4, 0(a0)
+       sb   a5, 1(a0)
+       sb   a6, 2(a0)
+       sb   a7, 3(a0)
+       sb   t0, 4(a0)
+       sb   t1, 5(a0)
+       sb   t2, 6(a0)
+       sb   t3, 7(a0)
+       addi a0, a0, 8
+       addi a1, a1, 8
+       bltu a1, a3, 1b
+
+2:
+       ret
+END(__riscv_copy_bytes_unaligned)
diff --git a/arch/riscv/kernel/copy-unaligned.h b/arch/riscv/kernel/copy-unaligned.h
new file mode 100644 (file)
index 0000000..e3d70d3
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos, Inc.
+ */
+#ifndef __RISCV_KERNEL_COPY_UNALIGNED_H
+#define __RISCV_KERNEL_COPY_UNALIGNED_H
+
+#include <linux/types.h>
+
+void __riscv_copy_words_unaligned(void *dst, const void *src, size_t size);
+void __riscv_copy_bytes_unaligned(void *dst, const void *src, size_t size);
+
+#endif /* __RISCV_KERNEL_COPY_UNALIGNED_H */
index ef7b4fd..1cfbba6 100644 (file)
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
 #include <asm/hwcap.h>
+#include <asm/hwprobe.h>
 #include <asm/patch.h>
 #include <asm/processor.h>
 #include <asm/vector.h>
 
+#include "copy-unaligned.h"
+
 #define NUM_ALPHA_EXTS ('z' - 'a' + 1)
 
+#define MISALIGNED_ACCESS_JIFFIES_LG2 1
+#define MISALIGNED_BUFFER_SIZE 0x4000
+#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
+
 unsigned long elf_hwcap __read_mostly;
 
 /* Host ISA bitmap */
@@ -549,6 +556,103 @@ unsigned long riscv_get_elf_hwcap(void)
        return hwcap;
 }
 
+void check_unaligned_access(int cpu)
+{
+       u64 start_cycles, end_cycles;
+       u64 word_cycles;
+       u64 byte_cycles;
+       int ratio;
+       unsigned long start_jiffies, now;
+       struct page *page;
+       void *dst;
+       void *src;
+       long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
+
+       page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
+       if (!page) {
+               pr_warn("Can't alloc pages to measure memcpy performance");
+               return;
+       }
+
+       /* Make an unaligned destination buffer. */
+       dst = (void *)((unsigned long)page_address(page) | 0x1);
+       /* Unalign src as well, but differently (off by 1 + 2 = 3). */
+       src = dst + (MISALIGNED_BUFFER_SIZE / 2);
+       src += 2;
+       word_cycles = -1ULL;
+       /* Do a warmup. */
+       __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+       preempt_disable();
+       start_jiffies = jiffies;
+       while ((now = jiffies) == start_jiffies)
+               cpu_relax();
+
+       /*
+        * For a fixed amount of time, repeatedly try the function, and take
+        * the best time in cycles as the measurement.
+        */
+       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+               start_cycles = get_cycles64();
+               /* Ensure the CSR read can't reorder WRT to the copy. */
+               mb();
+               __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+               /* Ensure the copy ends before the end time is snapped. */
+               mb();
+               end_cycles = get_cycles64();
+               if ((end_cycles - start_cycles) < word_cycles)
+                       word_cycles = end_cycles - start_cycles;
+       }
+
+       byte_cycles = -1ULL;
+       __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+       start_jiffies = jiffies;
+       while ((now = jiffies) == start_jiffies)
+               cpu_relax();
+
+       while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+               start_cycles = get_cycles64();
+               mb();
+               __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+               mb();
+               end_cycles = get_cycles64();
+               if ((end_cycles - start_cycles) < byte_cycles)
+                       byte_cycles = end_cycles - start_cycles;
+       }
+
+       preempt_enable();
+
+       /* Don't divide by zero. */
+       if (!word_cycles || !byte_cycles) {
+               pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
+                       cpu);
+
+               goto out;
+       }
+
+       if (word_cycles < byte_cycles)
+               speed = RISCV_HWPROBE_MISALIGNED_FAST;
+
+       ratio = div_u64((byte_cycles * 100), word_cycles);
+       pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
+               cpu,
+               ratio / 100,
+               ratio % 100,
+               (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
+
+       per_cpu(misaligned_access_speed, cpu) = speed;
+
+out:
+       __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
+}
+
+static int check_unaligned_access_boot_cpu(void)
+{
+       check_unaligned_access(0);
+       return 0;
+}
+
+arch_initcall(check_unaligned_access_boot_cpu);
+
 #ifdef CONFIG_RISCV_ALTERNATIVE
 /*
  * Alternative patch sites consider 48 bits when determining when to patch
index 1561615..ea1a103 100644 (file)
@@ -27,6 +27,7 @@ __efistub__start              = _start;
 __efistub__start_kernel                = _start_kernel;
 __efistub__end                 = _end;
 __efistub__edata               = _edata;
+__efistub___init_text_end      = __init_text_end;
 __efistub_screen_info          = screen_info;
 
 #endif
index 575e71d..13ee7bf 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/spinlock.h>
 #include <linux/mm.h>
 #include <linux/memory.h>
+#include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/stop_machine.h>
 #include <asm/kprobes.h>
@@ -53,13 +54,52 @@ static void patch_unmap(int fixmap)
 }
 NOKPROBE_SYMBOL(patch_unmap);
 
-static int patch_insn_write(void *addr, const void *insn, size_t len)
+static int __patch_insn_set(void *addr, u8 c, size_t len)
+{
+       void *waddr = addr;
+       bool across_pages = (((uintptr_t)addr & ~PAGE_MASK) + len) > PAGE_SIZE;
+
+       /*
+        * Only two pages can be mapped at a time for writing.
+        */
+       if (len + offset_in_page(addr) > 2 * PAGE_SIZE)
+               return -EINVAL;
+       /*
+        * Before reaching here, it was expected to lock the text_mutex
+        * already, so we don't need to give another lock here and could
+        * ensure that it was safe between each cores.
+        */
+       lockdep_assert_held(&text_mutex);
+
+       if (across_pages)
+               patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
+
+       waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+       memset(waddr, c, len);
+
+       patch_unmap(FIX_TEXT_POKE0);
+
+       if (across_pages)
+               patch_unmap(FIX_TEXT_POKE1);
+
+       return 0;
+}
+NOKPROBE_SYMBOL(__patch_insn_set);
+
+static int __patch_insn_write(void *addr, const void *insn, size_t len)
 {
        void *waddr = addr;
        bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
        int ret;
 
        /*
+        * Only two pages can be mapped at a time for writing.
+        */
+       if (len + offset_in_page(addr) > 2 * PAGE_SIZE)
+               return -EINVAL;
+
+       /*
         * Before reaching here, it was expected to lock the text_mutex
         * already, so we don't need to give another lock here and could
         * ensure that it was safe between each cores.
@@ -74,7 +114,7 @@ static int patch_insn_write(void *addr, const void *insn, size_t len)
                lockdep_assert_held(&text_mutex);
 
        if (across_pages)
-               patch_map(addr + len, FIX_TEXT_POKE1);
+               patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1);
 
        waddr = patch_map(addr, FIX_TEXT_POKE0);
 
@@ -87,15 +127,79 @@ static int patch_insn_write(void *addr, const void *insn, size_t len)
 
        return ret;
 }
-NOKPROBE_SYMBOL(patch_insn_write);
+NOKPROBE_SYMBOL(__patch_insn_write);
 #else
-static int patch_insn_write(void *addr, const void *insn, size_t len)
+static int __patch_insn_set(void *addr, u8 c, size_t len)
+{
+       memset(addr, c, len);
+
+       return 0;
+}
+NOKPROBE_SYMBOL(__patch_insn_set);
+
+static int __patch_insn_write(void *addr, const void *insn, size_t len)
 {
        return copy_to_kernel_nofault(addr, insn, len);
 }
-NOKPROBE_SYMBOL(patch_insn_write);
+NOKPROBE_SYMBOL(__patch_insn_write);
 #endif /* CONFIG_MMU */
 
+static int patch_insn_set(void *addr, u8 c, size_t len)
+{
+       size_t patched = 0;
+       size_t size;
+       int ret = 0;
+
+       /*
+        * __patch_insn_set() can only work on 2 pages at a time so call it in a
+        * loop with len <= 2 * PAGE_SIZE.
+        */
+       while (patched < len && !ret) {
+               size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched);
+               ret = __patch_insn_set(addr + patched, c, size);
+
+               patched += size;
+       }
+
+       return ret;
+}
+NOKPROBE_SYMBOL(patch_insn_set);
+
+int patch_text_set_nosync(void *addr, u8 c, size_t len)
+{
+       u32 *tp = addr;
+       int ret;
+
+       ret = patch_insn_set(tp, c, len);
+
+       if (!ret)
+               flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
+
+       return ret;
+}
+NOKPROBE_SYMBOL(patch_text_set_nosync);
+
+static int patch_insn_write(void *addr, const void *insn, size_t len)
+{
+       size_t patched = 0;
+       size_t size;
+       int ret = 0;
+
+       /*
+        * Copy the instructions to the destination address, two pages at a time
+        * because __patch_insn_write() can only handle len <= 2 * PAGE_SIZE.
+        */
+       while (patched < len && !ret) {
+               size = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(addr + patched), len - patched);
+               ret = __patch_insn_write(addr + patched, insn + patched, size);
+
+               patched += size;
+       }
+
+       return ret;
+}
+NOKPROBE_SYMBOL(patch_insn_write);
+
 int patch_text_nosync(void *addr, const void *insns, size_t len)
 {
        u32 *tp = addr;
index 7b593d4..07915dc 100644 (file)
@@ -35,5 +35,5 @@ $(obj)/string.o: $(srctree)/lib/string.c FORCE
 $(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE
        $(call if_changed_rule,cc_o_c)
 
-obj-y          := cmdline_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+obj-y          := cmdline_early.pi.o fdt_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
 extra-y                := $(patsubst %.pi.o,%.o,$(obj-y))
index 05652d1..68e786c 100644 (file)
@@ -14,6 +14,7 @@ static char early_cmdline[COMMAND_LINE_SIZE];
  * LLVM complain because the function is actually unused in this file).
  */
 u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa);
+bool set_nokaslr_from_cmdline(uintptr_t dtb_pa);
 
 static char *get_early_cmdline(uintptr_t dtb_pa)
 {
@@ -60,3 +61,15 @@ u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa)
 
        return match_noXlvl(cmdline);
 }
+
+static bool match_nokaslr(char *cmdline)
+{
+       return strstr(cmdline, "nokaslr");
+}
+
+bool set_nokaslr_from_cmdline(uintptr_t dtb_pa)
+{
+       char *cmdline = get_early_cmdline(dtb_pa);
+
+       return match_nokaslr(cmdline);
+}
diff --git a/arch/riscv/kernel/pi/fdt_early.c b/arch/riscv/kernel/pi/fdt_early.c
new file mode 100644 (file)
index 0000000..899610e
--- /dev/null
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/libfdt.h>
+
+/*
+ * Declare the functions that are exported (but prefixed) here so that LLVM
+ * does not complain it lacks the 'static' keyword (which, if added, makes
+ * LLVM complain because the function is actually unused in this file).
+ */
+u64 get_kaslr_seed(uintptr_t dtb_pa);
+
+u64 get_kaslr_seed(uintptr_t dtb_pa)
+{
+       int node, len;
+       fdt64_t *prop;
+       u64 ret;
+
+       node = fdt_path_offset((void *)dtb_pa, "/chosen");
+       if (node < 0)
+               return 0;
+
+       prop = fdt_getprop_w((void *)dtb_pa, node, "kaslr-seed", &len);
+       if (!prop || len != sizeof(u64))
+               return 0;
+
+       ret = fdt64_to_cpu(*prop);
+       *prop = 0;
+       return ret;
+}
index 487303e..2afe460 100644 (file)
@@ -25,6 +25,9 @@ enum riscv_regset {
 #ifdef CONFIG_FPU
        REGSET_F,
 #endif
+#ifdef CONFIG_RISCV_ISA_V
+       REGSET_V,
+#endif
 };
 
 static int riscv_gpr_get(struct task_struct *target,
@@ -81,6 +84,71 @@ static int riscv_fpr_set(struct task_struct *target,
 }
 #endif
 
+#ifdef CONFIG_RISCV_ISA_V
+static int riscv_vr_get(struct task_struct *target,
+                       const struct user_regset *regset,
+                       struct membuf to)
+{
+       struct __riscv_v_ext_state *vstate = &target->thread.vstate;
+       struct __riscv_v_regset_state ptrace_vstate;
+
+       if (!riscv_v_vstate_query(task_pt_regs(target)))
+               return -EINVAL;
+
+       /*
+        * Ensure the vector registers have been saved to the memory before
+        * copying them to membuf.
+        */
+       if (target == current)
+               riscv_v_vstate_save(current, task_pt_regs(current));
+
+       ptrace_vstate.vstart = vstate->vstart;
+       ptrace_vstate.vl = vstate->vl;
+       ptrace_vstate.vtype = vstate->vtype;
+       ptrace_vstate.vcsr = vstate->vcsr;
+       ptrace_vstate.vlenb = vstate->vlenb;
+
+       /* Copy vector header from vstate. */
+       membuf_write(&to, &ptrace_vstate, sizeof(struct __riscv_v_regset_state));
+
+       /* Copy all the vector registers from vstate. */
+       return membuf_write(&to, vstate->datap, riscv_v_vsize);
+}
+
+static int riscv_vr_set(struct task_struct *target,
+                       const struct user_regset *regset,
+                       unsigned int pos, unsigned int count,
+                       const void *kbuf, const void __user *ubuf)
+{
+       int ret;
+       struct __riscv_v_ext_state *vstate = &target->thread.vstate;
+       struct __riscv_v_regset_state ptrace_vstate;
+
+       if (!riscv_v_vstate_query(task_pt_regs(target)))
+               return -EINVAL;
+
+       /* Copy rest of the vstate except datap */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ptrace_vstate, 0,
+                                sizeof(struct __riscv_v_regset_state));
+       if (unlikely(ret))
+               return ret;
+
+       if (vstate->vlenb != ptrace_vstate.vlenb)
+               return -EINVAL;
+
+       vstate->vstart = ptrace_vstate.vstart;
+       vstate->vl = ptrace_vstate.vl;
+       vstate->vtype = ptrace_vstate.vtype;
+       vstate->vcsr = ptrace_vstate.vcsr;
+
+       /* Copy all the vector registers. */
+       pos = 0;
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap,
+                                0, riscv_v_vsize);
+       return ret;
+}
+#endif
+
 static const struct user_regset riscv_user_regset[] = {
        [REGSET_X] = {
                .core_note_type = NT_PRSTATUS,
@@ -100,6 +168,17 @@ static const struct user_regset riscv_user_regset[] = {
                .set = riscv_fpr_set,
        },
 #endif
+#ifdef CONFIG_RISCV_ISA_V
+       [REGSET_V] = {
+               .core_note_type = NT_RISCV_VECTOR,
+               .align = 16,
+               .n = ((32 * RISCV_MAX_VLENB) +
+                     sizeof(struct __riscv_v_regset_state)) / sizeof(__u32),
+               .size = sizeof(__u32),
+               .regset_get = riscv_vr_get,
+               .set = riscv_vr_set,
+       },
+#endif
 };
 
 static const struct user_regset_view riscv_user_native_view = {
index 32c2e1e..e600aab 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/smp.h>
 #include <linux/efi.h>
 #include <linux/crash_dump.h>
+#include <linux/panic_notifier.h>
 
 #include <asm/acpi.h>
 #include <asm/alternative.h>
@@ -347,3 +348,27 @@ void free_initmem(void)
 
        free_initmem_default(POISON_FREE_INITMEM);
 }
+
+static int dump_kernel_offset(struct notifier_block *self,
+                             unsigned long v, void *p)
+{
+       pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+                kernel_map.virt_offset,
+                KERNEL_LINK_ADDR);
+
+       return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+       .notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+               atomic_notifier_chain_register(&panic_notifier_list,
+                                              &kernel_offset_notifier);
+
+       return 0;
+}
+device_initcall(register_kernel_offset_dumper);
index f4d6acb..1b8da4e 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/sched/task_stack.h>
 #include <linux/sched/mm.h>
 #include <asm/cpu_ops.h>
+#include <asm/cpufeature.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
@@ -245,7 +246,7 @@ asmlinkage __visible void smp_callin(void)
 
        numa_add_cpu(curr_cpuid);
        set_cpu_online(curr_cpuid, 1);
-       probe_vendor_features(curr_cpuid);
+       check_unaligned_access(curr_cpuid);
 
        if (has_vector()) {
                if (riscv_v_setup_vsize())
index fee0671..4c2067f 100644 (file)
@@ -19,6 +19,7 @@ kvm-y += vcpu_exit.o
 kvm-y += vcpu_fp.o
 kvm-y += vcpu_vector.o
 kvm-y += vcpu_insn.o
+kvm-y += vcpu_onereg.o
 kvm-y += vcpu_switch.o
 kvm-y += vcpu_sbi.o
 kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
index 585a3b4..74bb274 100644 (file)
@@ -176,7 +176,7 @@ int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu,
        struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
 
        if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
-               return -EINVAL;
+               return -ENOENT;
 
        *out_val = 0;
        if (kvm_riscv_aia_available())
@@ -192,7 +192,7 @@ int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu,
        struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
 
        if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
-               return -EINVAL;
+               return -ENOENT;
 
        if (kvm_riscv_aia_available()) {
                ((unsigned long *)csr)[reg_num] = val;
index f2eb479..068c745 100644 (file)
@@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
 {
 }
@@ -559,7 +553,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
        int ret;
-       kvm_pfn_t pfn = pte_pfn(range->pte);
+       kvm_pfn_t pfn = pte_pfn(range->arg.pte);
 
        if (!kvm->arch.pgd)
                return false;
index d12ef99..82229db 100644 (file)
 #include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
-#include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/signal.h>
 #include <linux/fs.h>
 #include <linux/kvm_host.h>
 #include <asm/csr.h>
 #include <asm/cacheflush.h>
-#include <asm/hwcap.h>
-#include <asm/sbi.h>
-#include <asm/vector.h>
 #include <asm/kvm_vcpu_vector.h>
 
 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
@@ -46,79 +42,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
                       sizeof(kvm_vcpu_stats_desc),
 };
 
-#define KVM_RISCV_BASE_ISA_MASK                GENMASK(25, 0)
-
-#define KVM_ISA_EXT_ARR(ext)           [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
-
-/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
-static const unsigned long kvm_isa_ext_arr[] = {
-       [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
-       [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
-       [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
-       [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
-       [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
-       [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
-       [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
-       [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
-
-       KVM_ISA_EXT_ARR(SSAIA),
-       KVM_ISA_EXT_ARR(SSTC),
-       KVM_ISA_EXT_ARR(SVINVAL),
-       KVM_ISA_EXT_ARR(SVNAPOT),
-       KVM_ISA_EXT_ARR(SVPBMT),
-       KVM_ISA_EXT_ARR(ZBB),
-       KVM_ISA_EXT_ARR(ZIHINTPAUSE),
-       KVM_ISA_EXT_ARR(ZICBOM),
-       KVM_ISA_EXT_ARR(ZICBOZ),
-};
-
-static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
-{
-       unsigned long i;
-
-       for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
-               if (kvm_isa_ext_arr[i] == base_ext)
-                       return i;
-       }
-
-       return KVM_RISCV_ISA_EXT_MAX;
-}
-
-static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
-{
-       switch (ext) {
-       case KVM_RISCV_ISA_EXT_H:
-               return false;
-       case KVM_RISCV_ISA_EXT_V:
-               return riscv_v_vstate_ctrl_user_allowed();
-       default:
-               break;
-       }
-
-       return true;
-}
-
-static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
-{
-       switch (ext) {
-       case KVM_RISCV_ISA_EXT_A:
-       case KVM_RISCV_ISA_EXT_C:
-       case KVM_RISCV_ISA_EXT_I:
-       case KVM_RISCV_ISA_EXT_M:
-       case KVM_RISCV_ISA_EXT_SSAIA:
-       case KVM_RISCV_ISA_EXT_SSTC:
-       case KVM_RISCV_ISA_EXT_SVINVAL:
-       case KVM_RISCV_ISA_EXT_SVNAPOT:
-       case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
-       case KVM_RISCV_ISA_EXT_ZBB:
-               return false;
-       default:
-               break;
-       }
-
-       return true;
-}
-
 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
 {
        struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
@@ -176,7 +99,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        int rc;
        struct kvm_cpu_context *cntx;
        struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
-       unsigned long host_isa, i;
 
        /* Mark this VCPU never ran */
        vcpu->arch.ran_atleast_once = false;
@@ -184,12 +106,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
 
        /* Setup ISA features available to VCPU */
-       for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
-               host_isa = kvm_isa_ext_arr[i];
-               if (__riscv_isa_extension_available(NULL, host_isa) &&
-                   kvm_riscv_vcpu_isa_enable_allowed(i))
-                       set_bit(host_isa, vcpu->arch.isa);
-       }
+       kvm_riscv_vcpu_setup_isa(vcpu);
 
        /* Setup vendor, arch, and implementation details */
        vcpu->arch.mvendorid = sbi_get_mvendorid();
@@ -294,450 +211,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
        return VM_FAULT_SIGBUS;
 }
 
-static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
-                                        const struct kvm_one_reg *reg)
-{
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CONFIG);
-       unsigned long reg_val;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       switch (reg_num) {
-       case KVM_REG_RISCV_CONFIG_REG(isa):
-               reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
-               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
-                       return -EINVAL;
-               reg_val = riscv_cbom_block_size;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
-               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
-                       return -EINVAL;
-               reg_val = riscv_cboz_block_size;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
-               reg_val = vcpu->arch.mvendorid;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(marchid):
-               reg_val = vcpu->arch.marchid;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(mimpid):
-               reg_val = vcpu->arch.mimpid;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
-                                        const struct kvm_one_reg *reg)
-{
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CONFIG);
-       unsigned long i, isa_ext, reg_val;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       switch (reg_num) {
-       case KVM_REG_RISCV_CONFIG_REG(isa):
-               /*
-                * This ONE REG interface is only defined for
-                * single letter extensions.
-                */
-               if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
-                       return -EINVAL;
-
-               if (!vcpu->arch.ran_atleast_once) {
-                       /* Ignore the enable/disable request for certain extensions */
-                       for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
-                               isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
-                               if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
-                                       reg_val &= ~BIT(i);
-                                       continue;
-                               }
-                               if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
-                                       if (reg_val & BIT(i))
-                                               reg_val &= ~BIT(i);
-                               if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
-                                       if (!(reg_val & BIT(i)))
-                                               reg_val |= BIT(i);
-                       }
-                       reg_val &= riscv_isa_extension_base(NULL);
-                       /* Do not modify anything beyond single letter extensions */
-                       reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
-                                 (reg_val & KVM_RISCV_BASE_ISA_MASK);
-                       vcpu->arch.isa[0] = reg_val;
-                       kvm_riscv_vcpu_fp_reset(vcpu);
-               } else {
-                       return -EOPNOTSUPP;
-               }
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
-               return -EOPNOTSUPP;
-       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
-               return -EOPNOTSUPP;
-       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
-               if (!vcpu->arch.ran_atleast_once)
-                       vcpu->arch.mvendorid = reg_val;
-               else
-                       return -EBUSY;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(marchid):
-               if (!vcpu->arch.ran_atleast_once)
-                       vcpu->arch.marchid = reg_val;
-               else
-                       return -EBUSY;
-               break;
-       case KVM_REG_RISCV_CONFIG_REG(mimpid):
-               if (!vcpu->arch.ran_atleast_once)
-                       vcpu->arch.mimpid = reg_val;
-               else
-                       return -EBUSY;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
-                                      const struct kvm_one_reg *reg)
-{
-       struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CORE);
-       unsigned long reg_val;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-       if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
-               reg_val = cntx->sepc;
-       else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
-                reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
-               reg_val = ((unsigned long *)cntx)[reg_num];
-       else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
-               reg_val = (cntx->sstatus & SR_SPP) ?
-                               KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
-       else
-               return -EINVAL;
-
-       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
-                                      const struct kvm_one_reg *reg)
-{
-       struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CORE);
-       unsigned long reg_val;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-       if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
-               return -EINVAL;
-
-       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
-               cntx->sepc = reg_val;
-       else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
-                reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
-               ((unsigned long *)cntx)[reg_num] = reg_val;
-       else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
-               if (reg_val == KVM_RISCV_MODE_S)
-                       cntx->sstatus |= SR_SPP;
-               else
-                       cntx->sstatus &= ~SR_SPP;
-       } else
-               return -EINVAL;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
-                                         unsigned long reg_num,
-                                         unsigned long *out_val)
-{
-       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
-
-       if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
-               kvm_riscv_vcpu_flush_interrupts(vcpu);
-               *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
-               *out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
-       } else
-               *out_val = ((unsigned long *)csr)[reg_num];
-
-       return 0;
-}
-
-static inline int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
-                                                unsigned long reg_num,
-                                                unsigned long reg_val)
-{
-       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
-
-       if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
-               reg_val &= VSIP_VALID_MASK;
-               reg_val <<= VSIP_TO_HVIP_SHIFT;
-       }
-
-       ((unsigned long *)csr)[reg_num] = reg_val;
-
-       if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
-               WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
-                                     const struct kvm_one_reg *reg)
-{
-       int rc;
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CSR);
-       unsigned long reg_val, reg_subtype;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
-       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
-       switch (reg_subtype) {
-       case KVM_REG_RISCV_CSR_GENERAL:
-               rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, &reg_val);
-               break;
-       case KVM_REG_RISCV_CSR_AIA:
-               rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, &reg_val);
-               break;
-       default:
-               rc = -EINVAL;
-               break;
-       }
-       if (rc)
-               return rc;
-
-       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
-                                     const struct kvm_one_reg *reg)
-{
-       int rc;
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_CSR);
-       unsigned long reg_val, reg_subtype;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
-       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
-       switch (reg_subtype) {
-       case KVM_REG_RISCV_CSR_GENERAL:
-               rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
-               break;
-       case KVM_REG_RISCV_CSR_AIA:
-               rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
-               break;
-       default:
-               rc = -EINVAL;
-               break;
-       }
-       if (rc)
-               return rc;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
-                                         const struct kvm_one_reg *reg)
-{
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_ISA_EXT);
-       unsigned long reg_val = 0;
-       unsigned long host_isa_ext;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
-           reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
-               return -EINVAL;
-
-       host_isa_ext = kvm_isa_ext_arr[reg_num];
-       if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
-               reg_val = 1; /* Mark the given extension as available */
-
-       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
-                                         const struct kvm_one_reg *reg)
-{
-       unsigned long __user *uaddr =
-                       (unsigned long __user *)(unsigned long)reg->addr;
-       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
-                                           KVM_REG_SIZE_MASK |
-                                           KVM_REG_RISCV_ISA_EXT);
-       unsigned long reg_val;
-       unsigned long host_isa_ext;
-
-       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
-               return -EINVAL;
-
-       if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
-           reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
-               return -EINVAL;
-
-       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
-               return -EFAULT;
-
-       host_isa_ext = kvm_isa_ext_arr[reg_num];
-       if (!__riscv_isa_extension_available(NULL, host_isa_ext))
-               return  -EOPNOTSUPP;
-
-       if (!vcpu->arch.ran_atleast_once) {
-               /*
-                * All multi-letter extension and a few single letter
-                * extension can be disabled
-                */
-               if (reg_val == 1 &&
-                   kvm_riscv_vcpu_isa_enable_allowed(reg_num))
-                       set_bit(host_isa_ext, vcpu->arch.isa);
-               else if (!reg_val &&
-                        kvm_riscv_vcpu_isa_disable_allowed(reg_num))
-                       clear_bit(host_isa_ext, vcpu->arch.isa);
-               else
-                       return -EINVAL;
-               kvm_riscv_vcpu_fp_reset(vcpu);
-       } else {
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg)
-{
-       switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
-       case KVM_REG_RISCV_CONFIG:
-               return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
-       case KVM_REG_RISCV_CORE:
-               return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
-       case KVM_REG_RISCV_CSR:
-               return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
-       case KVM_REG_RISCV_TIMER:
-               return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
-       case KVM_REG_RISCV_FP_F:
-               return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
-                                                KVM_REG_RISCV_FP_F);
-       case KVM_REG_RISCV_FP_D:
-               return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
-                                                KVM_REG_RISCV_FP_D);
-       case KVM_REG_RISCV_ISA_EXT:
-               return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
-       case KVM_REG_RISCV_SBI_EXT:
-               return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
-       case KVM_REG_RISCV_VECTOR:
-               return kvm_riscv_vcpu_set_reg_vector(vcpu, reg,
-                                                KVM_REG_RISCV_VECTOR);
-       default:
-               break;
-       }
-
-       return -EINVAL;
-}
-
-static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg)
-{
-       switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
-       case KVM_REG_RISCV_CONFIG:
-               return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
-       case KVM_REG_RISCV_CORE:
-               return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
-       case KVM_REG_RISCV_CSR:
-               return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
-       case KVM_REG_RISCV_TIMER:
-               return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
-       case KVM_REG_RISCV_FP_F:
-               return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
-                                                KVM_REG_RISCV_FP_F);
-       case KVM_REG_RISCV_FP_D:
-               return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
-                                                KVM_REG_RISCV_FP_D);
-       case KVM_REG_RISCV_ISA_EXT:
-               return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
-       case KVM_REG_RISCV_SBI_EXT:
-               return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
-       case KVM_REG_RISCV_VECTOR:
-               return kvm_riscv_vcpu_get_reg_vector(vcpu, reg,
-                                                KVM_REG_RISCV_VECTOR);
-       default:
-               break;
-       }
-
-       return -EINVAL;
-}
-
 long kvm_arch_vcpu_async_ioctl(struct file *filp,
                               unsigned int ioctl, unsigned long arg)
 {
@@ -781,6 +254,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                        r = kvm_riscv_vcpu_get_reg(vcpu, &reg);
                break;
        }
+       case KVM_GET_REG_LIST: {
+               struct kvm_reg_list __user *user_list = argp;
+               struct kvm_reg_list reg_list;
+               unsigned int n;
+
+               r = -EFAULT;
+               if (copy_from_user(&reg_list, user_list, sizeof(reg_list)))
+                       break;
+               n = reg_list.n;
+               reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
+               if (copy_to_user(user_list, &reg_list, sizeof(reg_list)))
+                       break;
+               r = -E2BIG;
+               if (n < reg_list.n)
+                       break;
+               r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
+               break;
+       }
        default:
                break;
        }
index 9d8cbc4..08ba48a 100644 (file)
@@ -96,7 +96,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
                          reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
                        reg_val = &cntx->fp.f.f[reg_num];
                else
-                       return -EINVAL;
+                       return -ENOENT;
        } else if ((rtype == KVM_REG_RISCV_FP_D) &&
                   riscv_isa_extension_available(vcpu->arch.isa, d)) {
                if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
@@ -109,9 +109,9 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
                                return -EINVAL;
                        reg_val = &cntx->fp.d.f[reg_num];
                } else
-                       return -EINVAL;
+                       return -ENOENT;
        } else
-               return -EINVAL;
+               return -ENOENT;
 
        if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
                return -EFAULT;
@@ -141,7 +141,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
                          reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
                        reg_val = &cntx->fp.f.f[reg_num];
                else
-                       return -EINVAL;
+                       return -ENOENT;
        } else if ((rtype == KVM_REG_RISCV_FP_D) &&
                   riscv_isa_extension_available(vcpu->arch.isa, d)) {
                if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
@@ -154,9 +154,9 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
                                return -EINVAL;
                        reg_val = &cntx->fp.d.f[reg_num];
                } else
-                       return -EINVAL;
+                       return -ENOENT;
        } else
-               return -EINVAL;
+               return -ENOENT;
 
        if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id)))
                return -EFAULT;
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
new file mode 100644 (file)
index 0000000..1b7e9fa
--- /dev/null
@@ -0,0 +1,1051 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2023 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ *     Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <linux/kvm_host.h>
+#include <asm/cacheflush.h>
+#include <asm/hwcap.h>
+#include <asm/kvm_vcpu_vector.h>
+#include <asm/vector.h>
+
+#define KVM_RISCV_BASE_ISA_MASK                GENMASK(25, 0)
+
+#define KVM_ISA_EXT_ARR(ext)           \
+[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
+
+/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
+static const unsigned long kvm_isa_ext_arr[] = {
+       /* Single letter extensions (alphabetically sorted) */
+       [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
+       [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
+       [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
+       [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
+       [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
+       [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
+       [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
+       [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
+       /* Multi letter extensions (alphabetically sorted) */
+       KVM_ISA_EXT_ARR(SSAIA),
+       KVM_ISA_EXT_ARR(SSTC),
+       KVM_ISA_EXT_ARR(SVINVAL),
+       KVM_ISA_EXT_ARR(SVNAPOT),
+       KVM_ISA_EXT_ARR(SVPBMT),
+       KVM_ISA_EXT_ARR(ZBA),
+       KVM_ISA_EXT_ARR(ZBB),
+       KVM_ISA_EXT_ARR(ZBS),
+       KVM_ISA_EXT_ARR(ZICBOM),
+       KVM_ISA_EXT_ARR(ZICBOZ),
+       KVM_ISA_EXT_ARR(ZICNTR),
+       KVM_ISA_EXT_ARR(ZICSR),
+       KVM_ISA_EXT_ARR(ZIFENCEI),
+       KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+       KVM_ISA_EXT_ARR(ZIHPM),
+};
+
+static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
+{
+       unsigned long i;
+
+       for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+               if (kvm_isa_ext_arr[i] == base_ext)
+                       return i;
+       }
+
+       return KVM_RISCV_ISA_EXT_MAX;
+}
+
+static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
+{
+       switch (ext) {
+       case KVM_RISCV_ISA_EXT_H:
+               return false;
+       case KVM_RISCV_ISA_EXT_V:
+               return riscv_v_vstate_ctrl_user_allowed();
+       default:
+               break;
+       }
+
+       return true;
+}
+
+static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
+{
+       switch (ext) {
+       case KVM_RISCV_ISA_EXT_A:
+       case KVM_RISCV_ISA_EXT_C:
+       case KVM_RISCV_ISA_EXT_I:
+       case KVM_RISCV_ISA_EXT_M:
+       case KVM_RISCV_ISA_EXT_SSAIA:
+       case KVM_RISCV_ISA_EXT_SSTC:
+       case KVM_RISCV_ISA_EXT_SVINVAL:
+       case KVM_RISCV_ISA_EXT_SVNAPOT:
+       case KVM_RISCV_ISA_EXT_ZBA:
+       case KVM_RISCV_ISA_EXT_ZBB:
+       case KVM_RISCV_ISA_EXT_ZBS:
+       case KVM_RISCV_ISA_EXT_ZICNTR:
+       case KVM_RISCV_ISA_EXT_ZICSR:
+       case KVM_RISCV_ISA_EXT_ZIFENCEI:
+       case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+       case KVM_RISCV_ISA_EXT_ZIHPM:
+               return false;
+       default:
+               break;
+       }
+
+       return true;
+}
+
+void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu)
+{
+       unsigned long host_isa, i;
+
+       for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
+               host_isa = kvm_isa_ext_arr[i];
+               if (__riscv_isa_extension_available(NULL, host_isa) &&
+                   kvm_riscv_vcpu_isa_enable_allowed(i))
+                       set_bit(host_isa, vcpu->arch.isa);
+       }
+}
+
+static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
+                                        const struct kvm_one_reg *reg)
+{
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CONFIG);
+       unsigned long reg_val;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       switch (reg_num) {
+       case KVM_REG_RISCV_CONFIG_REG(isa):
+               reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+                       return -ENOENT;
+               reg_val = riscv_cbom_block_size;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+                       return -ENOENT;
+               reg_val = riscv_cboz_block_size;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+               reg_val = vcpu->arch.mvendorid;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(marchid):
+               reg_val = vcpu->arch.marchid;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(mimpid):
+               reg_val = vcpu->arch.mimpid;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+               reg_val = satp_mode >> SATP_MODE_SHIFT;
+               break;
+       default:
+               return -ENOENT;
+       }
+
+       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
+                                        const struct kvm_one_reg *reg)
+{
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CONFIG);
+       unsigned long i, isa_ext, reg_val;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       switch (reg_num) {
+       case KVM_REG_RISCV_CONFIG_REG(isa):
+               /*
+                * This ONE REG interface is only defined for
+                * single letter extensions.
+                */
+               if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
+                       return -EINVAL;
+
+               /*
+                * Return early (i.e. do nothing) if reg_val is the same
+                * value retrievable via kvm_riscv_vcpu_get_reg_config().
+                */
+               if (reg_val == (vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK))
+                       break;
+
+               if (!vcpu->arch.ran_atleast_once) {
+                       /* Ignore the enable/disable request for certain extensions */
+                       for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
+                               isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
+                               if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
+                                       reg_val &= ~BIT(i);
+                                       continue;
+                               }
+                               if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
+                                       if (reg_val & BIT(i))
+                                               reg_val &= ~BIT(i);
+                               if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
+                                       if (!(reg_val & BIT(i)))
+                                               reg_val |= BIT(i);
+                       }
+                       reg_val &= riscv_isa_extension_base(NULL);
+                       /* Do not modify anything beyond single letter extensions */
+                       reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
+                                 (reg_val & KVM_RISCV_BASE_ISA_MASK);
+                       vcpu->arch.isa[0] = reg_val;
+                       kvm_riscv_vcpu_fp_reset(vcpu);
+               } else {
+                       return -EBUSY;
+               }
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+                       return -ENOENT;
+               if (reg_val != riscv_cbom_block_size)
+                       return -EINVAL;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+               if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+                       return -ENOENT;
+               if (reg_val != riscv_cboz_block_size)
+                       return -EINVAL;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+               if (reg_val == vcpu->arch.mvendorid)
+                       break;
+               if (!vcpu->arch.ran_atleast_once)
+                       vcpu->arch.mvendorid = reg_val;
+               else
+                       return -EBUSY;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(marchid):
+               if (reg_val == vcpu->arch.marchid)
+                       break;
+               if (!vcpu->arch.ran_atleast_once)
+                       vcpu->arch.marchid = reg_val;
+               else
+                       return -EBUSY;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(mimpid):
+               if (reg_val == vcpu->arch.mimpid)
+                       break;
+               if (!vcpu->arch.ran_atleast_once)
+                       vcpu->arch.mimpid = reg_val;
+               else
+                       return -EBUSY;
+               break;
+       case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+               if (reg_val != (satp_mode >> SATP_MODE_SHIFT))
+                       return -EINVAL;
+               break;
+       default:
+               return -ENOENT;
+       }
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
+                                      const struct kvm_one_reg *reg)
+{
+       struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CORE);
+       unsigned long reg_val;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+       if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+               return -ENOENT;
+
+       if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+               reg_val = cntx->sepc;
+       else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+                reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+               reg_val = ((unsigned long *)cntx)[reg_num];
+       else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
+               reg_val = (cntx->sstatus & SR_SPP) ?
+                               KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
+       else
+               return -ENOENT;
+
+       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
+                                      const struct kvm_one_reg *reg)
+{
+       struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CORE);
+       unsigned long reg_val;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+       if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+               return -ENOENT;
+
+       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+               cntx->sepc = reg_val;
+       else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+                reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+               ((unsigned long *)cntx)[reg_num] = reg_val;
+       else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
+               if (reg_val == KVM_RISCV_MODE_S)
+                       cntx->sstatus |= SR_SPP;
+               else
+                       cntx->sstatus &= ~SR_SPP;
+       } else
+               return -ENOENT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
+                                         unsigned long reg_num,
+                                         unsigned long *out_val)
+{
+       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+       if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+               return -ENOENT;
+
+       if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+               kvm_riscv_vcpu_flush_interrupts(vcpu);
+               *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
+               *out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
+       } else
+               *out_val = ((unsigned long *)csr)[reg_num];
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
+                                         unsigned long reg_num,
+                                         unsigned long reg_val)
+{
+       struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+       if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+               return -ENOENT;
+
+       if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+               reg_val &= VSIP_VALID_MASK;
+               reg_val <<= VSIP_TO_HVIP_SHIFT;
+       }
+
+       ((unsigned long *)csr)[reg_num] = reg_val;
+
+       if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
+               WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
+                                     const struct kvm_one_reg *reg)
+{
+       int rc;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CSR);
+       unsigned long reg_val, reg_subtype;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_CSR_GENERAL:
+               rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, &reg_val);
+               break;
+       case KVM_REG_RISCV_CSR_AIA:
+               rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, &reg_val);
+               break;
+       default:
+               rc = -ENOENT;
+               break;
+       }
+       if (rc)
+               return rc;
+
+       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
+                                     const struct kvm_one_reg *reg)
+{
+       int rc;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_CSR);
+       unsigned long reg_val, reg_subtype;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_CSR_GENERAL:
+               rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
+               break;
+       case KVM_REG_RISCV_CSR_AIA:
+               rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
+               break;
+       default:
+               rc = -ENOENT;
+               break;
+       }
+       if (rc)
+               return rc;
+
+       return 0;
+}
+
+static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu,
+                                        unsigned long reg_num,
+                                        unsigned long *reg_val)
+{
+       unsigned long host_isa_ext;
+
+       if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+           reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+               return -ENOENT;
+
+       *reg_val = 0;
+       host_isa_ext = kvm_isa_ext_arr[reg_num];
+       if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
+               *reg_val = 1; /* Mark the given extension as available */
+
+       return 0;
+}
+
+static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
+                                        unsigned long reg_num,
+                                        unsigned long reg_val)
+{
+       unsigned long host_isa_ext;
+
+       if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+           reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+               return -ENOENT;
+
+       host_isa_ext = kvm_isa_ext_arr[reg_num];
+       if (!__riscv_isa_extension_available(NULL, host_isa_ext))
+               return -ENOENT;
+
+       if (reg_val == test_bit(host_isa_ext, vcpu->arch.isa))
+               return 0;
+
+       if (!vcpu->arch.ran_atleast_once) {
+               /*
+                * All multi-letter extension and a few single letter
+                * extension can be disabled
+                */
+               if (reg_val == 1 &&
+                   kvm_riscv_vcpu_isa_enable_allowed(reg_num))
+                       set_bit(host_isa_ext, vcpu->arch.isa);
+               else if (!reg_val &&
+                        kvm_riscv_vcpu_isa_disable_allowed(reg_num))
+                       clear_bit(host_isa_ext, vcpu->arch.isa);
+               else
+                       return -EINVAL;
+               kvm_riscv_vcpu_fp_reset(vcpu);
+       } else {
+               return -EBUSY;
+       }
+
+       return 0;
+}
+
+static int riscv_vcpu_get_isa_ext_multi(struct kvm_vcpu *vcpu,
+                                       unsigned long reg_num,
+                                       unsigned long *reg_val)
+{
+       unsigned long i, ext_id, ext_val;
+
+       if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+               return -ENOENT;
+
+       for (i = 0; i < BITS_PER_LONG; i++) {
+               ext_id = i + reg_num * BITS_PER_LONG;
+               if (ext_id >= KVM_RISCV_ISA_EXT_MAX)
+                       break;
+
+               ext_val = 0;
+               riscv_vcpu_get_isa_ext_single(vcpu, ext_id, &ext_val);
+               if (ext_val)
+                       *reg_val |= KVM_REG_RISCV_ISA_MULTI_MASK(ext_id);
+       }
+
+       return 0;
+}
+
+static int riscv_vcpu_set_isa_ext_multi(struct kvm_vcpu *vcpu,
+                                       unsigned long reg_num,
+                                       unsigned long reg_val, bool enable)
+{
+       unsigned long i, ext_id;
+
+       if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+               return -ENOENT;
+
+       for_each_set_bit(i, &reg_val, BITS_PER_LONG) {
+               ext_id = i + reg_num * BITS_PER_LONG;
+               if (ext_id >= KVM_RISCV_ISA_EXT_MAX)
+                       break;
+
+               riscv_vcpu_set_isa_ext_single(vcpu, ext_id, enable);
+       }
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
+                                         const struct kvm_one_reg *reg)
+{
+       int rc;
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_ISA_EXT);
+       unsigned long reg_val, reg_subtype;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+       reg_val = 0;
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_ISA_SINGLE:
+               rc = riscv_vcpu_get_isa_ext_single(vcpu, reg_num, &reg_val);
+               break;
+       case KVM_REG_RISCV_ISA_MULTI_EN:
+       case KVM_REG_RISCV_ISA_MULTI_DIS:
+               rc = riscv_vcpu_get_isa_ext_multi(vcpu, reg_num, &reg_val);
+               if (!rc && reg_subtype == KVM_REG_RISCV_ISA_MULTI_DIS)
+                       reg_val = ~reg_val;
+               break;
+       default:
+               rc = -ENOENT;
+       }
+       if (rc)
+               return rc;
+
+       if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
+                                         const struct kvm_one_reg *reg)
+{
+       unsigned long __user *uaddr =
+                       (unsigned long __user *)(unsigned long)reg->addr;
+       unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+                                           KVM_REG_SIZE_MASK |
+                                           KVM_REG_RISCV_ISA_EXT);
+       unsigned long reg_val, reg_subtype;
+
+       if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+               return -EINVAL;
+
+       reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+       reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+       if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+               return -EFAULT;
+
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_ISA_SINGLE:
+               return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
+       case KVM_REG_RISCV_SBI_MULTI_EN:
+               return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
+       case KVM_REG_RISCV_SBI_MULTI_DIS:
+               return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
+       default:
+               return -ENOENT;
+       }
+
+       return 0;
+}
+
+static int copy_config_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       int n = 0;
+
+       for (int i = 0; i < sizeof(struct kvm_riscv_config)/sizeof(unsigned long);
+                i++) {
+               u64 size;
+               u64 reg;
+
+               /*
+                * Avoid reporting config reg if the corresponding extension
+                * was not available.
+                */
+               if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) &&
+                       !riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+                       continue;
+               else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) &&
+                       !riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+                       continue;
+
+               size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CONFIG | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+
+               n++;
+       }
+
+       return n;
+}
+
+static unsigned long num_config_regs(const struct kvm_vcpu *vcpu)
+{
+       return copy_config_reg_indices(vcpu, NULL);
+}
+
+static inline unsigned long num_core_regs(void)
+{
+       return sizeof(struct kvm_riscv_core) / sizeof(unsigned long);
+}
+
+static int copy_core_reg_indices(u64 __user *uindices)
+{
+       int n = num_core_regs();
+
+       for (int i = 0; i < n; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CORE | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       return n;
+}
+
+static inline unsigned long num_csr_regs(const struct kvm_vcpu *vcpu)
+{
+       unsigned long n = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long);
+
+       if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA))
+               n += sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long);
+
+       return n;
+}
+
+static int copy_csr_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       int n1 = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long);
+       int n2 = 0;
+
+       /* copy general csr regs */
+       for (int i = 0; i < n1; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR |
+                                 KVM_REG_RISCV_CSR_GENERAL | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       /* copy AIA csr regs */
+       if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) {
+               n2 = sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long);
+
+               for (int i = 0; i < n2; i++) {
+                       u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                                  KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+                       u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR |
+                                         KVM_REG_RISCV_CSR_AIA | i;
+
+                       if (uindices) {
+                               if (put_user(reg, uindices))
+                                       return -EFAULT;
+                               uindices++;
+                       }
+               }
+       }
+
+       return n1 + n2;
+}
+
+static inline unsigned long num_timer_regs(void)
+{
+       return sizeof(struct kvm_riscv_timer) / sizeof(u64);
+}
+
+static int copy_timer_reg_indices(u64 __user *uindices)
+{
+       int n = num_timer_regs();
+
+       for (int i = 0; i < n; i++) {
+               u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 |
+                         KVM_REG_RISCV_TIMER | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       return n;
+}
+
+static inline unsigned long num_fp_f_regs(const struct kvm_vcpu *vcpu)
+{
+       const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+       if (riscv_isa_extension_available(vcpu->arch.isa, f))
+               return sizeof(cntx->fp.f) / sizeof(u32);
+       else
+               return 0;
+}
+
+static int copy_fp_f_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       int n = num_fp_f_regs(vcpu);
+
+       for (int i = 0; i < n; i++) {
+               u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 |
+                         KVM_REG_RISCV_FP_F | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       return n;
+}
+
+static inline unsigned long num_fp_d_regs(const struct kvm_vcpu *vcpu)
+{
+       const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+       if (riscv_isa_extension_available(vcpu->arch.isa, d))
+               return sizeof(cntx->fp.d.f) / sizeof(u64) + 1;
+       else
+               return 0;
+}
+
+static int copy_fp_d_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       int i;
+       int n = num_fp_d_regs(vcpu);
+       u64 reg;
+
+       /* copy fp.d.f indices */
+       for (i = 0; i < n-1; i++) {
+               reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 |
+                     KVM_REG_RISCV_FP_D | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       /* copy fp.d.fcsr indices */
+       reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | i;
+       if (uindices) {
+               if (put_user(reg, uindices))
+                       return -EFAULT;
+               uindices++;
+       }
+
+       return n;
+}
+
+static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
+                               u64 __user *uindices)
+{
+       unsigned int n = 0;
+       unsigned long isa_ext;
+
+       for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i;
+
+               isa_ext = kvm_isa_ext_arr[i];
+               if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext))
+                       continue;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+
+               n++;
+       }
+
+       return n;
+}
+
+static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
+{
+       return copy_isa_ext_reg_indices(vcpu, NULL);;
+}
+
+static inline unsigned long num_sbi_ext_regs(void)
+{
+       /*
+        * number of KVM_REG_RISCV_SBI_SINGLE +
+        * 2 x (number of KVM_REG_RISCV_SBI_MULTI)
+        */
+       return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1);
+}
+
+static int copy_sbi_ext_reg_indices(u64 __user *uindices)
+{
+       int n;
+
+       /* copy KVM_REG_RISCV_SBI_SINGLE */
+       n = KVM_RISCV_SBI_EXT_MAX;
+       for (int i = 0; i < n; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+                         KVM_REG_RISCV_SBI_SINGLE | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       /* copy KVM_REG_RISCV_SBI_MULTI */
+       n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1;
+       for (int i = 0; i < n; i++) {
+               u64 size = IS_ENABLED(CONFIG_32BIT) ?
+                          KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+               u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+                         KVM_REG_RISCV_SBI_MULTI_EN | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+
+               reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+                         KVM_REG_RISCV_SBI_MULTI_DIS | i;
+
+               if (uindices) {
+                       if (put_user(reg, uindices))
+                               return -EFAULT;
+                       uindices++;
+               }
+       }
+
+       return num_sbi_ext_regs();
+}
+
+/*
+ * kvm_riscv_vcpu_num_regs - how many registers do we present via KVM_GET/SET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu)
+{
+       unsigned long res = 0;
+
+       res += num_config_regs(vcpu);
+       res += num_core_regs();
+       res += num_csr_regs(vcpu);
+       res += num_timer_regs();
+       res += num_fp_f_regs(vcpu);
+       res += num_fp_d_regs(vcpu);
+       res += num_isa_ext_regs(vcpu);
+       res += num_sbi_ext_regs();
+
+       return res;
+}
+
+/*
+ * kvm_riscv_vcpu_copy_reg_indices - get indices of all registers.
+ */
+int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
+                                   u64 __user *uindices)
+{
+       int ret;
+
+       ret = copy_config_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_core_reg_indices(uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_csr_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_timer_reg_indices(uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_fp_f_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_fp_d_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_isa_ext_reg_indices(vcpu, uindices);
+       if (ret < 0)
+               return ret;
+       uindices += ret;
+
+       ret = copy_sbi_ext_reg_indices(uindices);
+       if (ret < 0)
+               return ret;
+
+       return 0;
+}
+
+int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+                          const struct kvm_one_reg *reg)
+{
+       switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
+       case KVM_REG_RISCV_CONFIG:
+               return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
+       case KVM_REG_RISCV_CORE:
+               return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
+       case KVM_REG_RISCV_CSR:
+               return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
+       case KVM_REG_RISCV_TIMER:
+               return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
+       case KVM_REG_RISCV_FP_F:
+               return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+                                                KVM_REG_RISCV_FP_F);
+       case KVM_REG_RISCV_FP_D:
+               return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+                                                KVM_REG_RISCV_FP_D);
+       case KVM_REG_RISCV_ISA_EXT:
+               return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
+       case KVM_REG_RISCV_SBI_EXT:
+               return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
+       case KVM_REG_RISCV_VECTOR:
+               return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
+       default:
+               break;
+       }
+
+       return -ENOENT;
+}
+
+int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+                          const struct kvm_one_reg *reg)
+{
+       switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
+       case KVM_REG_RISCV_CONFIG:
+               return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
+       case KVM_REG_RISCV_CORE:
+               return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
+       case KVM_REG_RISCV_CSR:
+               return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
+       case KVM_REG_RISCV_TIMER:
+               return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
+       case KVM_REG_RISCV_FP_F:
+               return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+                                                KVM_REG_RISCV_FP_F);
+       case KVM_REG_RISCV_FP_D:
+               return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+                                                KVM_REG_RISCV_FP_D);
+       case KVM_REG_RISCV_ISA_EXT:
+               return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
+       case KVM_REG_RISCV_SBI_EXT:
+               return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
+       case KVM_REG_RISCV_VECTOR:
+               return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
+       default:
+               break;
+       }
+
+       return -ENOENT;
+}
index 7b46e04..9cd9709 100644 (file)
@@ -140,8 +140,10 @@ static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu,
        const struct kvm_riscv_sbi_extension_entry *sext = NULL;
        struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
 
-       if (reg_num >= KVM_RISCV_SBI_EXT_MAX ||
-           (reg_val != 1 && reg_val != 0))
+       if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
+               return -ENOENT;
+
+       if (reg_val != 1 && reg_val != 0)
                return -EINVAL;
 
        for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
@@ -175,7 +177,7 @@ static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu,
        struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
 
        if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
-               return -EINVAL;
+               return -ENOENT;
 
        for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
                if (sbi_ext[i].ext_idx == reg_num) {
@@ -206,7 +208,7 @@ static int riscv_vcpu_set_sbi_ext_multi(struct kvm_vcpu *vcpu,
        unsigned long i, ext_id;
 
        if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
-               return -EINVAL;
+               return -ENOENT;
 
        for_each_set_bit(i, &reg_val, BITS_PER_LONG) {
                ext_id = i + reg_num * BITS_PER_LONG;
@@ -226,7 +228,7 @@ static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu,
        unsigned long i, ext_id, ext_val;
 
        if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
-               return -EINVAL;
+               return -ENOENT;
 
        for (i = 0; i < BITS_PER_LONG; i++) {
                ext_id = i + reg_num * BITS_PER_LONG;
@@ -272,7 +274,7 @@ int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu,
        case KVM_REG_RISCV_SBI_MULTI_DIS:
                return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, false);
        default:
-               return -EINVAL;
+               return -ENOENT;
        }
 
        return 0;
@@ -307,7 +309,7 @@ int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu,
                        reg_val = ~reg_val;
                break;
        default:
-               rc = -EINVAL;
+               rc = -ENOENT;
        }
        if (rc)
                return rc;
index 3ac2ff6..75486b2 100644 (file)
@@ -170,7 +170,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
        if (KVM_REG_SIZE(reg->id) != sizeof(u64))
                return -EINVAL;
        if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
-               return -EINVAL;
+               return -ENOENT;
 
        switch (reg_num) {
        case KVM_REG_RISCV_TIMER_REG(frequency):
@@ -187,7 +187,7 @@ int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
                                          KVM_RISCV_TIMER_STATE_OFF;
                break;
        default:
-               return -EINVAL;
+               return -ENOENT;
        }
 
        if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
@@ -211,14 +211,15 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
        if (KVM_REG_SIZE(reg->id) != sizeof(u64))
                return -EINVAL;
        if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
-               return -EINVAL;
+               return -ENOENT;
 
        if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
                return -EFAULT;
 
        switch (reg_num) {
        case KVM_REG_RISCV_TIMER_REG(frequency):
-               ret = -EOPNOTSUPP;
+               if (reg_val != riscv_timebase)
+                       return -EINVAL;
                break;
        case KVM_REG_RISCV_TIMER_REG(time):
                gt->time_delta = reg_val - get_cycles64();
@@ -233,7 +234,7 @@ int kvm_riscv_vcpu_set_reg_timer(struct kvm_vcpu *vcpu,
                        ret = kvm_riscv_vcpu_timer_cancel(t);
                break;
        default:
-               ret = -EINVAL;
+               ret = -ENOENT;
                break;
        }
 
index edd2eec..b430cbb 100644 (file)
@@ -91,95 +91,93 @@ void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
 }
 #endif
 
-static void *kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
-                                     unsigned long reg_num,
-                                     size_t reg_size)
+static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
+                                   unsigned long reg_num,
+                                   size_t reg_size,
+                                   void **reg_addr)
 {
        struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
-       void *reg_val;
        size_t vlenb = riscv_v_vsize / 32;
 
        if (reg_num < KVM_REG_RISCV_VECTOR_REG(0)) {
                if (reg_size != sizeof(unsigned long))
-                       return NULL;
+                       return -EINVAL;
                switch (reg_num) {
                case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
-                       reg_val = &cntx->vector.vstart;
+                       *reg_addr = &cntx->vector.vstart;
                        break;
                case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
-                       reg_val = &cntx->vector.vl;
+                       *reg_addr = &cntx->vector.vl;
                        break;
                case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
-                       reg_val = &cntx->vector.vtype;
+                       *reg_addr = &cntx->vector.vtype;
                        break;
                case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
-                       reg_val = &cntx->vector.vcsr;
+                       *reg_addr = &cntx->vector.vcsr;
                        break;
                case KVM_REG_RISCV_VECTOR_CSR_REG(datap):
                default:
-                       return NULL;
+                       return -ENOENT;
                }
        } else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) {
                if (reg_size != vlenb)
-                       return NULL;
-               reg_val = cntx->vector.datap
-                         + (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
+                       return -EINVAL;
+               *reg_addr = cntx->vector.datap +
+                           (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
        } else {
-               return NULL;
+               return -ENOENT;
        }
 
-       return reg_val;
+       return 0;
 }
 
 int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg,
-                                 unsigned long rtype)
+                                 const struct kvm_one_reg *reg)
 {
        unsigned long *isa = vcpu->arch.isa;
        unsigned long __user *uaddr =
                        (unsigned long __user *)(unsigned long)reg->addr;
        unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
                                            KVM_REG_SIZE_MASK |
-                                           rtype);
-       void *reg_val = NULL;
+                                           KVM_REG_RISCV_VECTOR);
        size_t reg_size = KVM_REG_SIZE(reg->id);
+       void *reg_addr;
+       int rc;
 
-       if (rtype == KVM_REG_RISCV_VECTOR &&
-           riscv_isa_extension_available(isa, v)) {
-               reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
-       }
+       if (!riscv_isa_extension_available(isa, v))
+               return -ENOENT;
 
-       if (!reg_val)
-               return -EINVAL;
+       rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, &reg_addr);
+       if (rc)
+               return rc;
 
-       if (copy_to_user(uaddr, reg_val, reg_size))
+       if (copy_to_user(uaddr, reg_addr, reg_size))
                return -EFAULT;
 
        return 0;
 }
 
 int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
-                                 const struct kvm_one_reg *reg,
-                                 unsigned long rtype)
+                                 const struct kvm_one_reg *reg)
 {
        unsigned long *isa = vcpu->arch.isa;
        unsigned long __user *uaddr =
                        (unsigned long __user *)(unsigned long)reg->addr;
        unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
                                            KVM_REG_SIZE_MASK |
-                                           rtype);
-       void *reg_val = NULL;
+                                           KVM_REG_RISCV_VECTOR);
        size_t reg_size = KVM_REG_SIZE(reg->id);
+       void *reg_addr;
+       int rc;
 
-       if (rtype == KVM_REG_RISCV_VECTOR &&
-           riscv_isa_extension_available(isa, v)) {
-               reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
-       }
+       if (!riscv_isa_extension_available(isa, v))
+               return -ENOENT;
 
-       if (!reg_val)
-               return -EINVAL;
+       rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, &reg_addr);
+       if (rc)
+               return rc;
 
-       if (copy_from_user(reg_val, uaddr, reg_size))
+       if (copy_from_user(reg_addr, uaddr, reg_size))
                return -EFAULT;
 
        return 0;
index 7270b4d..b76e7e1 100644 (file)
@@ -9,26 +9,93 @@
 #include <linux/dma-map-ops.h>
 #include <linux/mm.h>
 #include <asm/cacheflush.h>
+#include <asm/dma-noncoherent.h>
 
 static bool noncoherent_supported __ro_after_init;
 int dma_cache_alignment __ro_after_init = ARCH_DMA_MINALIGN;
 EXPORT_SYMBOL_GPL(dma_cache_alignment);
 
-void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-                             enum dma_data_direction dir)
+struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init = {
+       .wback = NULL,
+       .inv = NULL,
+       .wback_inv = NULL,
+};
+
+static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
+{
+       void *vaddr = phys_to_virt(paddr);
+
+#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
+       if (unlikely(noncoherent_cache_ops.wback)) {
+               noncoherent_cache_ops.wback(paddr, size);
+               return;
+       }
+#endif
+       ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
+}
+
+static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+       void *vaddr = phys_to_virt(paddr);
+
+#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
+       if (unlikely(noncoherent_cache_ops.inv)) {
+               noncoherent_cache_ops.inv(paddr, size);
+               return;
+       }
+#endif
+
+       ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size);
+}
+
+static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
 {
        void *vaddr = phys_to_virt(paddr);
 
+#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
+       if (unlikely(noncoherent_cache_ops.wback_inv)) {
+               noncoherent_cache_ops.wback_inv(paddr, size);
+               return;
+       }
+#endif
+
+       ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
+}
+
+static inline bool arch_sync_dma_clean_before_fromdevice(void)
+{
+       return true;
+}
+
+static inline bool arch_sync_dma_cpu_needs_post_dma_flush(void)
+{
+       return true;
+}
+
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+                             enum dma_data_direction dir)
+{
        switch (dir) {
        case DMA_TO_DEVICE:
-               ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
+               arch_dma_cache_wback(paddr, size);
                break;
+
        case DMA_FROM_DEVICE:
-               ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
-               break;
+               if (!arch_sync_dma_clean_before_fromdevice()) {
+                       arch_dma_cache_inv(paddr, size);
+                       break;
+               }
+               fallthrough;
+
        case DMA_BIDIRECTIONAL:
-               ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
+               /* Skip the invalidate here if it's done later */
+               if (IS_ENABLED(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) &&
+                   arch_sync_dma_cpu_needs_post_dma_flush())
+                       arch_dma_cache_wback(paddr, size);
+               else
+                       arch_dma_cache_wback_inv(paddr, size);
                break;
+
        default:
                break;
        }
@@ -37,15 +104,17 @@ void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
 void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
                           enum dma_data_direction dir)
 {
-       void *vaddr = phys_to_virt(paddr);
-
        switch (dir) {
        case DMA_TO_DEVICE:
                break;
+
        case DMA_FROM_DEVICE:
        case DMA_BIDIRECTIONAL:
-               ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
+               /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */
+               if (arch_sync_dma_cpu_needs_post_dma_flush())
+                       arch_dma_cache_inv(paddr, size);
                break;
+
        default:
                break;
        }
@@ -55,6 +124,13 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 {
        void *flush_addr = page_address(page);
 
+#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
+       if (unlikely(noncoherent_cache_ops.wback_inv)) {
+               noncoherent_cache_ops.wback_inv(page_to_phys(page), size);
+               return;
+       }
+#endif
+
        ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size);
 }
 
@@ -86,3 +162,12 @@ void __init riscv_set_dma_cache_alignment(void)
        if (!noncoherent_supported)
                dma_cache_alignment = 1;
 }
+
+void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops)
+{
+       if (!ops)
+               return;
+
+       noncoherent_cache_ops = *ops;
+}
+EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops);
index 943c18d..0798bd8 100644 (file)
@@ -1014,11 +1014,45 @@ static void __init pt_ops_set_late(void)
 #endif
 }
 
+#ifdef CONFIG_RANDOMIZE_BASE
+extern bool __init __pi_set_nokaslr_from_cmdline(uintptr_t dtb_pa);
+extern u64 __init __pi_get_kaslr_seed(uintptr_t dtb_pa);
+
+static int __init print_nokaslr(char *p)
+{
+       pr_info("Disabled KASLR");
+       return 0;
+}
+early_param("nokaslr", print_nokaslr);
+
+unsigned long kaslr_offset(void)
+{
+       return kernel_map.virt_offset;
+}
+#endif
+
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 {
        pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
 
-       kernel_map.virt_addr = KERNEL_LINK_ADDR;
+#ifdef CONFIG_RANDOMIZE_BASE
+       if (!__pi_set_nokaslr_from_cmdline(dtb_pa)) {
+               u64 kaslr_seed = __pi_get_kaslr_seed(dtb_pa);
+               u32 kernel_size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
+               u32 nr_pos;
+
+               /*
+                * Compute the number of positions available: we are limited
+                * by the early page table that only has one PUD and we must
+                * be aligned on PMD_SIZE.
+                */
+               nr_pos = (PUD_SIZE - kernel_size) / PMD_SIZE;
+
+               kernel_map.virt_offset = (kaslr_seed % nr_pos) * PMD_SIZE;
+       }
+#endif
+
+       kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset;
        kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
 
 #ifdef CONFIG_XIP_KERNEL
index 089df92..c5fc5ec 100644 (file)
@@ -7,15 +7,28 @@
 #include <linux/libnvdimm.h>
 
 #include <asm/cacheflush.h>
+#include <asm/dma-noncoherent.h>
 
 void arch_wb_cache_pmem(void *addr, size_t size)
 {
+#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
+       if (unlikely(noncoherent_cache_ops.wback)) {
+               noncoherent_cache_ops.wback(virt_to_phys(addr), size);
+               return;
+       }
+#endif
        ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size);
 }
 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
+#ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
+       if (unlikely(noncoherent_cache_ops.inv)) {
+               noncoherent_cache_ops.inv(virt_to_phys(addr), size);
+               return;
+       }
+#endif
        ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
index d21c6c9..a5ce1ab 100644 (file)
@@ -68,6 +68,7 @@ static inline bool is_creg(u8 reg)
 struct rv_jit_context {
        struct bpf_prog *prog;
        u16 *insns;             /* RV insns */
+       u16 *ro_insns;
        int ninsns;
        int prologue_len;
        int epilogue_offset;
@@ -85,7 +86,9 @@ static inline int ninsns_rvoff(int ninsns)
 
 struct rv_jit_data {
        struct bpf_binary_header *header;
+       struct bpf_binary_header *ro_header;
        u8 *image;
+       u8 *ro_image;
        struct rv_jit_context ctx;
 };
 
index 8423f4d..ecd3ae6 100644 (file)
@@ -144,7 +144,11 @@ static bool in_auipc_jalr_range(s64 val)
 /* Emit fixed-length instructions for address */
 static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx)
 {
-       u64 ip = (u64)(ctx->insns + ctx->ninsns);
+       /*
+        * Use the ro_insns(RX) to calculate the offset as the BPF program will
+        * finally run from this memory region.
+        */
+       u64 ip = (u64)(ctx->ro_insns + ctx->ninsns);
        s64 off = addr - ip;
        s64 upper = (off + (1 << 11)) >> 12;
        s64 lower = off & 0xfff;
@@ -464,8 +468,12 @@ static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx)
        s64 off = 0;
        u64 ip;
 
-       if (addr && ctx->insns) {
-               ip = (u64)(long)(ctx->insns + ctx->ninsns);
+       if (addr && ctx->insns && ctx->ro_insns) {
+               /*
+                * Use the ro_insns(RX) to calculate the offset as the BPF
+                * program will finally run from this memory region.
+                */
+               ip = (u64)(long)(ctx->ro_insns + ctx->ninsns);
                off = addr - ip;
        }
 
@@ -578,9 +586,10 @@ static int add_exception_handler(const struct bpf_insn *insn,
 {
        struct exception_table_entry *ex;
        unsigned long pc;
-       off_t offset;
+       off_t ins_offset;
+       off_t fixup_offset;
 
-       if (!ctx->insns || !ctx->prog->aux->extable ||
+       if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
            (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX))
                return 0;
 
@@ -594,12 +603,17 @@ static int add_exception_handler(const struct bpf_insn *insn,
                return -EINVAL;
 
        ex = &ctx->prog->aux->extable[ctx->nexentries];
-       pc = (unsigned long)&ctx->insns[ctx->ninsns - insn_len];
+       pc = (unsigned long)&ctx->ro_insns[ctx->ninsns - insn_len];
 
-       offset = pc - (long)&ex->insn;
-       if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+       /*
+        * This is the relative offset of the instruction that may fault from
+        * the exception table itself. This will be written to the exception
+        * table and if this instruction faults, the destination register will
+        * be set to '0' and the execution will jump to the next instruction.
+        */
+       ins_offset = pc - (long)&ex->insn;
+       if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
                return -ERANGE;
-       ex->insn = offset;
 
        /*
         * Since the extable follows the program, the fixup offset is always
@@ -608,12 +622,25 @@ static int add_exception_handler(const struct bpf_insn *insn,
         * bits. We don't need to worry about buildtime or runtime sort
         * modifying the upper bits because the table is already sorted, and
         * isn't part of the main exception table.
+        *
+        * The fixup_offset is set to the next instruction from the instruction
+        * that may fault. The execution will jump to this after handling the
+        * fault.
         */
-       offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
-       if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
+       fixup_offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
+       if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
                return -ERANGE;
 
-       ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
+       /*
+        * The offsets above have been calculated using the RO buffer but we
+        * need to use the R/W buffer for writes.
+        * switch ex to rw buffer for writing.
+        */
+       ex = (void *)ctx->insns + ((void *)ex - (void *)ctx->ro_insns);
+
+       ex->insn = ins_offset;
+
+       ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
                FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
        ex->type = EX_TYPE_BPF;
 
@@ -1007,6 +1034,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
 
        ctx.ninsns = 0;
        ctx.insns = NULL;
+       ctx.ro_insns = NULL;
        ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
        if (ret < 0)
                return ret;
@@ -1015,7 +1043,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
                return -EFBIG;
 
        ctx.ninsns = 0;
+       /*
+        * The bpf_int_jit_compile() uses a RW buffer (ctx.insns) to write the
+        * JITed instructions and later copies it to a RX region (ctx.ro_insns).
+        * It also uses ctx.ro_insns to calculate offsets for jumps etc. As the
+        * trampoline image uses the same memory area for writing and execution,
+        * both ctx.insns and ctx.ro_insns can be set to image.
+        */
        ctx.insns = image;
+       ctx.ro_insns = image;
        ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
        if (ret < 0)
                return ret;
index 7a26a3e..7b70ccb 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <linux/bpf.h>
 #include <linux/filter.h>
+#include <linux/memory.h>
+#include <asm/patch.h>
 #include "bpf_jit.h"
 
 /* Number of iterations to try until offsets converge. */
@@ -117,16 +119,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                                sizeof(struct exception_table_entry);
                        prog_size = sizeof(*ctx->insns) * ctx->ninsns;
 
-                       jit_data->header =
-                               bpf_jit_binary_alloc(prog_size + extable_size,
-                                                    &jit_data->image,
-                                                    sizeof(u32),
-                                                    bpf_fill_ill_insns);
-                       if (!jit_data->header) {
+                       jit_data->ro_header =
+                               bpf_jit_binary_pack_alloc(prog_size + extable_size,
+                                                         &jit_data->ro_image, sizeof(u32),
+                                                         &jit_data->header, &jit_data->image,
+                                                         bpf_fill_ill_insns);
+                       if (!jit_data->ro_header) {
                                prog = orig_prog;
                                goto out_offset;
                        }
 
+                       /*
+                        * Use the image(RW) for writing the JITed instructions. But also save
+                        * the ro_image(RX) for calculating the offsets in the image. The RW
+                        * image will be later copied to the RX image from where the program
+                        * will run. The bpf_jit_binary_pack_finalize() will do this copy in the
+                        * final step.
+                        */
+                       ctx->ro_insns = (u16 *)jit_data->ro_image;
                        ctx->insns = (u16 *)jit_data->image;
                        /*
                         * Now, when the image is allocated, the image can
@@ -138,14 +148,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
        if (i == NR_JIT_ITERATIONS) {
                pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
-               if (jit_data->header)
-                       bpf_jit_binary_free(jit_data->header);
                prog = orig_prog;
-               goto out_offset;
+               goto out_free_hdr;
        }
 
        if (extable_size)
-               prog->aux->extable = (void *)ctx->insns + prog_size;
+               prog->aux->extable = (void *)ctx->ro_insns + prog_size;
 
 skip_init_ctx:
        pass++;
@@ -154,23 +162,33 @@ skip_init_ctx:
 
        bpf_jit_build_prologue(ctx);
        if (build_body(ctx, extra_pass, NULL)) {
-               bpf_jit_binary_free(jit_data->header);
                prog = orig_prog;
-               goto out_offset;
+               goto out_free_hdr;
        }
        bpf_jit_build_epilogue(ctx);
 
        if (bpf_jit_enable > 1)
                bpf_jit_dump(prog->len, prog_size, pass, ctx->insns);
 
-       prog->bpf_func = (void *)ctx->insns;
+       prog->bpf_func = (void *)ctx->ro_insns;
        prog->jited = 1;
        prog->jited_len = prog_size;
 
-       bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
-
        if (!prog->is_func || extra_pass) {
-               bpf_jit_binary_lock_ro(jit_data->header);
+               if (WARN_ON(bpf_jit_binary_pack_finalize(prog, jit_data->ro_header,
+                                                        jit_data->header))) {
+                       /* ro_header has been freed */
+                       jit_data->ro_header = NULL;
+                       prog = orig_prog;
+                       goto out_offset;
+               }
+               /*
+                * The instructions have now been copied to the ROX region from
+                * where they will execute.
+                * Write any modified data cache blocks out to memory and
+                * invalidate the corresponding blocks in the instruction cache.
+                */
+               bpf_flush_icache(jit_data->ro_header, ctx->ro_insns + ctx->ninsns);
                for (i = 0; i < prog->len; i++)
                        ctx->offset[i] = ninsns_rvoff(ctx->offset[i]);
                bpf_prog_fill_jited_linfo(prog, ctx->offset);
@@ -185,6 +203,14 @@ out:
                bpf_jit_prog_release_other(prog, prog == orig_prog ?
                                           tmp : orig_prog);
        return prog;
+
+out_free_hdr:
+       if (jit_data->header) {
+               bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size,
+                                  sizeof(jit_data->header->size));
+               bpf_jit_binary_pack_free(jit_data->ro_header, jit_data->header);
+       }
+       goto out_offset;
 }
 
 u64 bpf_jit_alloc_exec_limit(void)
@@ -204,3 +230,51 @@ void bpf_jit_free_exec(void *addr)
 {
        return vfree(addr);
 }
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+       int ret;
+
+       mutex_lock(&text_mutex);
+       ret = patch_text_nosync(dst, src, len);
+       mutex_unlock(&text_mutex);
+
+       if (ret)
+               return ERR_PTR(-EINVAL);
+
+       return dst;
+}
+
+int bpf_arch_text_invalidate(void *dst, size_t len)
+{
+       int ret;
+
+       mutex_lock(&text_mutex);
+       ret = patch_text_set_nosync(dst, 0, len);
+       mutex_unlock(&text_mutex);
+
+       return ret;
+}
+
+void bpf_jit_free(struct bpf_prog *prog)
+{
+       if (prog->jited) {
+               struct rv_jit_data *jit_data = prog->aux->jit_data;
+               struct bpf_binary_header *hdr;
+
+               /*
+                * If we fail the final pass of JIT (from jit_subprogs),
+                * the program may not be finalized yet. Call finalize here
+                * before freeing it.
+                */
+               if (jit_data) {
+                       bpf_jit_binary_pack_finalize(prog, jit_data->ro_header, jit_data->header);
+                       kfree(jit_data);
+               }
+               hdr = bpf_jit_binary_pack_hdr(prog);
+               bpf_jit_binary_pack_free(hdr, NULL);
+               WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
+       }
+
+       bpf_prog_unlock_free(prog);
+}
index 8753cb0..7b75217 100644 (file)
@@ -19,7 +19,6 @@ struct parmarea parmarea __section(".parmarea") = {
 };
 
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
-int __bootdata(noexec_disabled);
 
 unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
 struct ipl_parameter_block __bootdata_preserved(ipl_block);
@@ -290,12 +289,6 @@ void parse_boot_command_line(void)
                                zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG;
                }
 
-               if (!strcmp(param, "noexec")) {
-                       rc = kstrtobool(val, &enabled);
-                       if (!rc && !enabled)
-                               noexec_disabled = 1;
-               }
-
                if (!strcmp(param, "facilities") && val)
                        modify_fac_list(val);
 
index b9681cb..d3e48bd 100644 (file)
@@ -53,10 +53,8 @@ static void detect_facilities(void)
        }
        if (test_facility(78))
                machine.has_edat2 = 1;
-       if (!noexec_disabled && test_facility(130)) {
+       if (test_facility(130))
                machine.has_nx = 1;
-               __ctl_set_bit(0, 20);
-       }
 }
 
 static void setup_lpp(void)
index c67f59d..01257ce 100644 (file)
@@ -287,7 +287,9 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e
                        if (kasan_pte_populate_zero_shadow(pte, mode))
                                continue;
                        entry = __pte(_pa(addr, PAGE_SIZE, mode));
-                       entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+                       entry = set_pte_bit(entry, PAGE_KERNEL);
+                       if (!machine.has_nx)
+                               entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
                        set_pte(pte, entry);
                        pages++;
                }
@@ -311,7 +313,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
                                continue;
                        if (can_large_pmd(pmd, addr, next)) {
                                entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
-                               entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+                               entry = set_pmd_bit(entry, SEGMENT_KERNEL);
+                               if (!machine.has_nx)
+                                       entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
                                set_pmd(pmd, entry);
                                pages++;
                                continue;
@@ -342,7 +346,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
                                continue;
                        if (can_large_pud(pud, addr, next)) {
                                entry = __pud(_pa(addr, _REGION3_SIZE, mode));
-                               entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+                               entry = set_pud_bit(entry, REGION3_KERNEL);
+                               if (!machine.has_nx)
+                                       entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
                                set_pud(pud, entry);
                                pages++;
                                continue;
index 39227b4..eb7f84f 100644 (file)
@@ -1 +1,2 @@
+# Help: Enable BTF debug info
 CONFIG_DEBUG_INFO_BTF=y
index 700a8b2..84c2b55 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Enable KASan for debugging
 CONFIG_KASAN=y
 CONFIG_KASAN_INLINE=y
 CONFIG_KASAN_VMALLOC=y
index e82e562..c4c28c2 100644 (file)
@@ -18,7 +18,6 @@ struct airq_struct {
        struct hlist_node list;         /* Handler queueing. */
        void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
        u8 *lsi_ptr;                    /* Local-Summary-Indicator pointer */
-       u8 lsi_mask;                    /* Local-Summary-Indicator mask */
        u8 isc;                         /* Interrupt-subclass */
        u8 flags;
 };
index c260adb..7fe3e31 100644 (file)
@@ -9,6 +9,6 @@
  * to DMA. It _is_ used for the s390 memory zone split at 2GB caused
  * by the 31 bit heritage.
  */
-#define MAX_DMA_ADDRESS         0x80000000
+#define MAX_DMA_ADDRESS                __va(0x80000000)
 
 #endif /* _ASM_S390_DMA_H */
index 91bfecb..427f952 100644 (file)
@@ -817,6 +817,8 @@ struct kvm_s390_cpu_model {
        __u64 *fac_list;
        u64 cpuid;
        unsigned short ibc;
+       /* subset of available UV-features for pv-guests enabled by user space */
+       struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
 };
 
 typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
index 3fecaa4..0486e6e 100644 (file)
@@ -23,7 +23,7 @@
  */
 #define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var
 
-extern unsigned long __samode31, __eamode31;
-extern unsigned long __stext_amode31, __etext_amode31;
+extern char *__samode31, *__eamode31;
+extern char *__stext_amode31, *__etext_amode31;
 
 #endif
index 7a3eefd..06fbabe 100644 (file)
@@ -24,43 +24,41 @@ enum {
 #define SET_MEMORY_INV BIT(_SET_MEMORY_INV_BIT)
 #define SET_MEMORY_DEF BIT(_SET_MEMORY_DEF_BIT)
 
-int __set_memory(unsigned long addr, int numpages, unsigned long flags);
-
-static inline int set_memory_ro(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_RO);
-}
-
-static inline int set_memory_rw(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_RW);
-}
-
-static inline int set_memory_nx(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_NX);
-}
-
-static inline int set_memory_x(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_X);
-}
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags);
 
 #define set_memory_rox set_memory_rox
-static inline int set_memory_rox(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_RO | SET_MEMORY_X);
-}
 
-static inline int set_memory_rwnx(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_RW | SET_MEMORY_NX);
+/*
+ * Generate two variants of each set_memory() function:
+ *
+ * set_memory_yy(unsigned long addr, int numpages);
+ * __set_memory_yy(void *start, void *end);
+ *
+ * The second variant exists for both convenience to avoid the usual
+ * (unsigned long) casts, but unlike the first variant it can also be used
+ * for areas larger than 8TB, which may happen at memory initialization.
+ */
+#define __SET_MEMORY_FUNC(fname, flags)                                        \
+static inline int fname(unsigned long addr, int numpages)              \
+{                                                                      \
+       return __set_memory(addr, numpages, (flags));                   \
+}                                                                      \
+                                                                       \
+static inline int __##fname(void *start, void *end)                    \
+{                                                                      \
+       unsigned long numpages;                                         \
+                                                                       \
+       numpages = (end - start) >> PAGE_SHIFT;                         \
+       return __set_memory((unsigned long)start, numpages, (flags));   \
 }
 
-static inline int set_memory_4k(unsigned long addr, int numpages)
-{
-       return __set_memory(addr, numpages, SET_MEMORY_4K);
-}
+__SET_MEMORY_FUNC(set_memory_ro, SET_MEMORY_RO)
+__SET_MEMORY_FUNC(set_memory_rw, SET_MEMORY_RW)
+__SET_MEMORY_FUNC(set_memory_nx, SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_x, SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rox, SET_MEMORY_RO | SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rwnx, SET_MEMORY_RW | SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K)
 
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
index b30fe91..25cadc2 100644 (file)
@@ -72,7 +72,6 @@ extern unsigned int zlib_dfltcc_support;
 #define ZLIB_DFLTCC_INFLATE_ONLY       3
 #define ZLIB_DFLTCC_FULL_DEBUG         4
 
-extern int noexec_disabled;
 extern unsigned long ident_map_size;
 extern unsigned long max_mappable;
 
index d2cd42b..0e7bd38 100644 (file)
@@ -99,6 +99,8 @@ enum uv_cmds_inst {
 enum uv_feat_ind {
        BIT_UV_FEAT_MISC = 0,
        BIT_UV_FEAT_AIV = 1,
+       BIT_UV_FEAT_AP = 4,
+       BIT_UV_FEAT_AP_INTR = 5,
 };
 
 struct uv_cb_header {
@@ -159,7 +161,15 @@ struct uv_cb_cgc {
        u64 guest_handle;
        u64 conf_base_stor_origin;
        u64 conf_virt_stor_origin;
-       u64 reserved30;
+       u8  reserved30[6];
+       union {
+               struct {
+                       u16 : 14;
+                       u16 ap_instr_intr : 1;
+                       u16 ap_allow_instr : 1;
+               };
+               u16 raw;
+       } flags;
        u64 guest_stor_origin;
        u64 guest_stor_len;
        u64 guest_sca;
@@ -397,6 +407,13 @@ struct uv_info {
 
 extern struct uv_info uv_info;
 
+static inline bool uv_has_feature(u8 feature_bit)
+{
+       if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
+               return false;
+       return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
+}
+
 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 extern int prot_virt_guest;
 
index a73cf01..abe926d 100644 (file)
@@ -159,6 +159,22 @@ struct kvm_s390_vm_cpu_subfunc {
        __u8 reserved[1728];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST        6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST  7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS        64
+struct kvm_s390_vm_cpu_uv_feat {
+       union {
+               struct {
+                       __u64 : 4;
+                       __u64 ap : 1;           /* bit 4 */
+                       __u64 ap_intr : 1;      /* bit 5 */
+                       __u64 : 58;
+               };
+               __u64 feat;
+       };
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW       0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW       1
index 2dd5976..442ce04 100644 (file)
@@ -44,7 +44,6 @@ early_param(#param, ignore_decompressor_param_##param)
 decompressor_handled_param(mem);
 decompressor_handled_param(vmalloc);
 decompressor_handled_param(dfltcc);
-decompressor_handled_param(noexec);
 decompressor_handled_param(facilities);
 decompressor_handled_param(nokaslr);
 #if IS_ENABLED(CONFIG_KVM)
@@ -233,10 +232,8 @@ static __init void detect_machine_facilities(void)
                S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
                __ctl_set_bit(0, 17);
        }
-       if (test_facility(130) && !noexec_disabled) {
+       if (test_facility(130))
                S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
-               __ctl_set_bit(0, 20);
-       }
        if (test_facility(133))
                S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
        if (test_facility(139) && (tod_clock_base.tod >> 63)) {
index 12a2bd4..ce65fc0 100644 (file)
@@ -216,8 +216,8 @@ void arch_crash_save_vmcoreinfo(void)
        VMCOREINFO_SYMBOL(lowcore_ptr);
        VMCOREINFO_SYMBOL(high_memory);
        VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
-       vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
-       vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
+       vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+       vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
        vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
        abs_lc = get_abs_lowcore();
        abs_lc->vmcore_info = paddr_vmcoreinfo_note();
index c744104..de6ad0f 100644 (file)
@@ -97,10 +97,10 @@ EXPORT_SYMBOL(console_irq);
  * relocated above 2 GB, because it has to use 31 bit addresses.
  * Such code and data is part of the .amode31 section.
  */
-unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
-unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
-unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
-unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
+char __amode31_ref *__samode31 = _samode31;
+char __amode31_ref *__eamode31 = _eamode31;
+char __amode31_ref *__stext_amode31 = _stext_amode31;
+char __amode31_ref *__etext_amode31 = _etext_amode31;
 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
 
@@ -145,7 +145,6 @@ static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
 
-int __bootdata(noexec_disabled);
 unsigned long __bootdata_preserved(max_mappable);
 unsigned long __bootdata(ident_map_size);
 struct physmem_info __bootdata(physmem_info);
@@ -771,15 +770,15 @@ static void __init setup_memory(void)
 static void __init relocate_amode31_section(void)
 {
        unsigned long amode31_size = __eamode31 - __samode31;
-       long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31;
-       long *ptr;
+       long amode31_offset, *ptr;
 
+       amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31;
        pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
 
        /* Move original AMODE31 section to the new one */
-       memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size);
+       memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
        /* Zero out the old AMODE31 section to catch invalid accesses within it */
-       memset((void *)__samode31, 0, amode31_size);
+       memset(__samode31, 0, amode31_size);
 
        /* Update all AMODE31 region references */
        for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
index b771f1b..fc07bc3 100644 (file)
@@ -258,7 +258,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
         * shared page from a different protected VM will automatically also
         * transfer its ownership.
         */
-       if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
+       if (uv_has_feature(BIT_UV_FEAT_MISC))
                return false;
        if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
                return false;
index 341abaf..b163520 100644 (file)
@@ -228,6 +228,21 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
 
 #define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
 
+static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
+{
+       if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
+               return false;
+       if (guestdbg_sstep_enabled(vcpu) &&
+           vcpu->arch.sie_block->iprcc != PGM_PER) {
+               /*
+                * __vcpu_run() will exit after delivering the concurrently
+                * indicated condition.
+                */
+               return false;
+       }
+       return true;
+}
+
 static int handle_prog(struct kvm_vcpu *vcpu)
 {
        psw_t psw;
@@ -242,7 +257,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
        if (kvm_s390_pv_cpu_is_protected(vcpu))
                return -EOPNOTSUPP;
 
-       if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+       if (should_handle_per_event(vcpu)) {
                rc = kvm_s390_handle_per_event(vcpu);
                if (rc)
                        return rc;
@@ -571,6 +586,19 @@ static int handle_pv_notification(struct kvm_vcpu *vcpu)
        return handle_instruction(vcpu);
 }
 
+static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
+{
+       /* Process PER, also if the instruction is processed in user space. */
+       if (!(vcpu->arch.sie_block->icptstatus & 0x02))
+               return false;
+       if (rc != 0 && rc != -EOPNOTSUPP)
+               return false;
+       if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
+               /* __vcpu_run() will exit after delivering the interrupt. */
+               return false;
+       return true;
+}
+
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
        int rc, per_rc = 0;
@@ -605,8 +633,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
                rc = handle_partial_execution(vcpu);
                break;
        case ICPT_KSS:
-               rc = kvm_s390_skey_check_enable(vcpu);
-               break;
+               /* Instruction will be redriven, skip the PER check. */
+               return kvm_s390_skey_check_enable(vcpu);
        case ICPT_MCHKREQ:
        case ICPT_INT_ENABLE:
                /*
@@ -633,9 +661,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
                return -EOPNOTSUPP;
        }
 
-       /* process PER, also if the instruction is processed in user space */
-       if (vcpu->arch.sie_block->icptstatus & 0x02 &&
-           (!rc || rc == -EOPNOTSUPP))
+       if (should_handle_per_ifetch(vcpu, rc))
                per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
        return per_rc ? per_rc : rc;
 }
index 9bd0a87..c1b47d6 100644 (file)
@@ -1392,6 +1392,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        int rc = 0;
+       bool delivered = false;
        unsigned long irq_type;
        unsigned long irqs;
 
@@ -1465,6 +1466,19 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
                        WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
                        clear_bit(irq_type, &li->pending_irqs);
                }
+               delivered |= !rc;
+       }
+
+       /*
+        * We delivered at least one interrupt and modified the PC. Force a
+        * singlestep event now.
+        */
+       if (delivered && guestdbg_sstep_enabled(vcpu)) {
+               struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+
+               debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
+               debug_exit->type = KVM_SINGLESTEP;
+               vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
        }
 
        set_intercept_indicators(vcpu);
@@ -3398,7 +3412,6 @@ static void gib_alert_irq_handler(struct airq_struct *airq,
 
 static struct airq_struct gib_alert_irq = {
        .handler = gib_alert_irq_handler,
-       .lsi_ptr = &gib_alert_irq.lsi_mask,
 };
 
 void kvm_s390_gib_destroy(void)
@@ -3438,6 +3451,8 @@ int __init kvm_s390_gib_init(u8 nisc)
                rc = -EIO;
                goto out_free_gib;
        }
+       /* adapter interrupts used for AP (applicable here) don't use the LSI */
+       *gib_alert_irq.lsi_ptr = 0xff;
 
        gib->nisc = nisc;
        gib_origin = virt_to_phys(gib);
index d1e768b..b3f17e0 100644 (file)
@@ -1531,6 +1531,39 @@ static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
        return 0;
 }
 
+#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK     \
+(                                              \
+       ((struct kvm_s390_vm_cpu_uv_feat){      \
+               .ap = 1,                        \
+               .ap_intr = 1,                   \
+       })                                      \
+       .feat                                   \
+)
+
+static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
+       unsigned long data, filter;
+
+       filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+       if (get_user(data, &ptr->feat))
+               return -EFAULT;
+       if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+       if (kvm->created_vcpus) {
+               mutex_unlock(&kvm->lock);
+               return -EBUSY;
+       }
+       kvm->arch.model.uv_feat_guest.feat = data;
+       mutex_unlock(&kvm->lock);
+
+       VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
+
+       return 0;
+}
+
 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
        int ret = -ENXIO;
@@ -1545,6 +1578,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
        case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
                ret = kvm_s390_set_processor_subfunc(kvm, attr);
                break;
+       case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+               ret = kvm_s390_set_uv_feat(kvm, attr);
+               break;
        }
        return ret;
 }
@@ -1777,6 +1813,33 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
        return 0;
 }
 
+static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+       unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
+
+       if (put_user(feat, &dst->feat))
+               return -EFAULT;
+       VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+       return 0;
+}
+
+static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+       struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+       unsigned long feat;
+
+       BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
+
+       feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+       if (put_user(feat, &dst->feat))
+               return -EFAULT;
+       VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+       return 0;
+}
+
 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 {
        int ret = -ENXIO;
@@ -1800,6 +1863,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
        case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
                ret = kvm_s390_get_machine_subfunc(kvm, attr);
                break;
+       case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+               ret = kvm_s390_get_processor_uv_feat(kvm, attr);
+               break;
+       case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+               ret = kvm_s390_get_machine_uv_feat(kvm, attr);
+               break;
        }
        return ret;
 }
@@ -1952,6 +2021,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
                case KVM_S390_VM_CPU_MACHINE_FEAT:
                case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
                case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+               case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+               case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
                        ret = 0;
                        break;
                default:
@@ -2406,7 +2477,7 @@ static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
        struct kvm_vcpu *vcpu;
 
        /* Disable the GISA if the ultravisor does not support AIV. */
-       if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
+       if (!uv_has_feature(BIT_UV_FEAT_AIV))
                kvm_s390_gisa_disable(kvm);
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -3296,6 +3367,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
        kvm->arch.model.ibc = sclp.ibc & 0x0fff;
 
+       kvm->arch.model.uv_feat_guest.feat = 0;
+
        kvm_s390_crypto_init(kvm);
 
        if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
@@ -4611,7 +4684,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 
        if (!kvm_is_ucontrol(vcpu->kvm)) {
                rc = kvm_s390_deliver_pending_interrupts(vcpu);
-               if (rc)
+               if (rc || guestdbg_exit_pending(vcpu))
                        return rc;
        }
 
@@ -4738,7 +4811,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
        do {
                rc = vcpu_pre_run(vcpu);
-               if (rc)
+               if (rc || guestdbg_exit_pending(vcpu))
                        break;
 
                kvm_vcpu_srcu_read_unlock(vcpu);
@@ -5383,6 +5456,7 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 {
        struct kvm_vcpu *vcpu = filp->private_data;
        void __user *argp = (void __user *)arg;
+       int rc;
 
        switch (ioctl) {
        case KVM_S390_IRQ: {
@@ -5390,7 +5464,8 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
 
                if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
                        return -EFAULT;
-               return kvm_s390_inject_vcpu(vcpu, &s390irq);
+               rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+               break;
        }
        case KVM_S390_INTERRUPT: {
                struct kvm_s390_interrupt s390int;
@@ -5400,10 +5475,25 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
                        return -EFAULT;
                if (s390int_to_s390irq(&s390int, &s390irq))
                        return -EINVAL;
-               return kvm_s390_inject_vcpu(vcpu, &s390irq);
+               rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+               break;
        }
+       default:
+               rc = -ENOIOCTLCMD;
+               break;
        }
-       return -ENOIOCTLCMD;
+
+       /*
+        * To simplify single stepping of userspace-emulated instructions,
+        * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
+        * should_handle_per_ifetch()). However, if userspace emulation injects
+        * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
+        * after (and not before) the interrupt delivery.
+        */
+       if (!rc)
+               vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+
+       return rc;
 }
 
 static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
index 8d3f39a..75e81ba 100644 (file)
@@ -285,7 +285,8 @@ static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
        WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
        KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
                     uvcb.header.rc, uvcb.header.rrc);
-       WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+       WARN_ONCE(cc && uvcb.header.rc != 0x104,
+                 "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
                  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
        /* Intended memory leak on "impossible" error */
        if (!cc)
@@ -575,12 +576,14 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
        uvcb.conf_base_stor_origin =
                virt_to_phys((void *)kvm->arch.pv.stor_base);
        uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+       uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
+       uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
 
        cc = uv_call_sched(0, (u64)&uvcb);
        *rc = uvcb.header.rc;
        *rrc = uvcb.header.rrc;
-       KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
-                    uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+       KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
+                    uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
 
        /* Outputs */
        kvm->arch.pv.handle = uvcb.guest_handle;
index afa5db7..b516669 100644 (file)
@@ -290,8 +290,8 @@ static int pt_dump_init(void)
        max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
        max_addr = 1UL << (max_addr * 11 + 31);
        address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
-       address_markers[AMODE31_START_NR].start_address = __samode31;
-       address_markers[AMODE31_END_NR].start_address = __eamode31;
+       address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31;
+       address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31;
        address_markers[MODULES_NR].start_address = MODULES_VADDR;
        address_markers[MODULES_END_NR].start_address = MODULES_END;
        address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
index 099c482..b678295 100644 (file)
@@ -598,7 +598,7 @@ void do_secure_storage_access(struct pt_regs *regs)
         * reliable without the misc UV feature so we need to check
         * for that as well.
         */
-       if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
+       if (uv_has_feature(BIT_UV_FEAT_MISC) &&
            !test_bit_inv(61, &regs->int_parm_long)) {
                /*
                 * When this happens, userspace did something that it
index 8d94e29..8b94d22 100644 (file)
@@ -98,7 +98,7 @@ void __init paging_init(void)
        sparse_init();
        zone_dma_bits = 31;
        memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-       max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
+       max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS);
        max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
        free_area_init(max_zone_pfns);
 }
@@ -107,7 +107,7 @@ void mark_rodata_ro(void)
 {
        unsigned long size = __end_ro_after_init - __start_ro_after_init;
 
-       set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
+       __set_memory_ro(__start_ro_after_init, __end_ro_after_init);
        pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
        debug_checkwx();
 }
index e5ec762..b87e96c 100644 (file)
@@ -373,7 +373,7 @@ static int change_page_attr_alias(unsigned long addr, unsigned long end,
        return rc;
 }
 
-int __set_memory(unsigned long addr, int numpages, unsigned long flags)
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
 {
        unsigned long end;
        int rc;
index e44243b..6957d2e 100644 (file)
@@ -5,7 +5,6 @@
 
 #include <linux/memory_hotplug.h>
 #include <linux/memblock.h>
-#include <linux/kasan.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
 #include <linux/init.h>
@@ -291,14 +290,9 @@ out:
 
 static void try_free_pmd_table(pud_t *pud, unsigned long start)
 {
-       const unsigned long end = start + PUD_SIZE;
        pmd_t *pmd;
        int i;
 
-       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-       if (end > VMALLOC_START)
-               return;
-
        pmd = pmd_offset(pud, start);
        for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
                if (!pmd_none(*pmd))
@@ -363,14 +357,9 @@ out:
 
 static void try_free_pud_table(p4d_t *p4d, unsigned long start)
 {
-       const unsigned long end = start + P4D_SIZE;
        pud_t *pud;
        int i;
 
-       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-       if (end > VMALLOC_START)
-               return;
-
        pud = pud_offset(p4d, start);
        for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
                if (!pud_none(*pud))
@@ -413,14 +402,9 @@ out:
 
 static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
 {
-       const unsigned long end = start + PGDIR_SIZE;
        p4d_t *p4d;
        int i;
 
-       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
-       if (end > VMALLOC_START)
-               return;
-
        p4d = p4d_offset(pgd, start);
        for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
                if (!p4d_none(*p4d))
@@ -440,6 +424,9 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
 
        if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
                return -EINVAL;
+       /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+       if (WARN_ON_ONCE(end > VMALLOC_START))
+               return -EINVAL;
        for (addr = start; addr < end; addr = next) {
                next = pgd_addr_end(addr, end);
                pgd = pgd_offset_k(addr);
@@ -650,122 +637,29 @@ void vmem_unmap_4k_page(unsigned long addr)
        mutex_unlock(&vmem_mutex);
 }
 
-static int __init memblock_region_cmp(const void *a, const void *b)
-{
-       const struct memblock_region *r1 = a;
-       const struct memblock_region *r2 = b;
-
-       if (r1->base < r2->base)
-               return -1;
-       if (r1->base > r2->base)
-               return 1;
-       return 0;
-}
-
-static void __init memblock_region_swap(void *a, void *b, int size)
-{
-       swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
-}
-
-#ifdef CONFIG_KASAN
-#define __sha(x)       ((unsigned long)kasan_mem_to_shadow((void *)x))
-
-static inline int set_memory_kasan(unsigned long start, unsigned long end)
-{
-       start = PAGE_ALIGN_DOWN(__sha(start));
-       end = PAGE_ALIGN(__sha(end));
-       return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT);
-}
-#endif
-
-/*
- * map whole physical memory to virtual memory (identity mapping)
- * we reserve enough space in the vmalloc area for vmemmap to hotplug
- * additional memory segments.
- */
 void __init vmem_map_init(void)
 {
-       struct memblock_region memory_rwx_regions[] = {
-               {
-                       .base   = 0,
-                       .size   = sizeof(struct lowcore),
-                       .flags  = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-                       .nid    = NUMA_NO_NODE,
-#endif
-               },
-               {
-                       .base   = __pa(_stext),
-                       .size   = _etext - _stext,
-                       .flags  = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-                       .nid    = NUMA_NO_NODE,
-#endif
-               },
-               {
-                       .base   = __pa(_sinittext),
-                       .size   = _einittext - _sinittext,
-                       .flags  = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-                       .nid    = NUMA_NO_NODE,
-#endif
-               },
-               {
-                       .base   = __stext_amode31,
-                       .size   = __etext_amode31 - __stext_amode31,
-                       .flags  = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
-                       .nid    = NUMA_NO_NODE,
-#endif
-               },
-       };
-       struct memblock_type memory_rwx = {
-               .regions        = memory_rwx_regions,
-               .cnt            = ARRAY_SIZE(memory_rwx_regions),
-               .max            = ARRAY_SIZE(memory_rwx_regions),
-       };
-       phys_addr_t base, end;
-       u64 i;
-
+       __set_memory_rox(_stext, _etext);
+       __set_memory_ro(_etext, __end_rodata);
+       __set_memory_rox(_sinittext, _einittext);
+       __set_memory_rox(__stext_amode31, __etext_amode31);
        /*
-        * Set RW+NX attribute on all memory, except regions enumerated with
-        * memory_rwx exclude type. These regions need different attributes,
-        * which are enforced afterwards.
-        *
-        * __for_each_mem_range() iterate and exclude types should be sorted.
-        * The relative location of _stext and _sinittext is hardcoded in the
-        * linker script. However a location of __stext_amode31 and the kernel
-        * image itself are chosen dynamically. Thus, sort the exclude type.
+        * If the BEAR-enhancement facility is not installed the first
+        * prefix page is used to return to the previous context with
+        * an LPSWE instruction and therefore must be executable.
         */
-       sort(&memory_rwx_regions,
-            ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
-            memblock_region_cmp, memblock_region_swap);
-       __for_each_mem_range(i, &memblock.memory, &memory_rwx,
-                            NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
-               set_memory_rwnx((unsigned long)__va(base),
-                               (end - base) >> PAGE_SHIFT);
+       if (!static_key_enabled(&cpu_has_bear))
+               set_memory_x(0, 1);
+       if (debug_pagealloc_enabled()) {
+               /*
+                * Use RELOC_HIDE() as long as __va(0) translates to NULL,
+                * since performing pointer arithmetic on a NULL pointer
+                * has undefined behavior and generates compiler warnings.
+                */
+               __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size));
        }
-
-#ifdef CONFIG_KASAN
-       for_each_mem_range(i, &base, &end)
-               set_memory_kasan(base, end);
-#endif
-       set_memory_rox((unsigned long)_stext,
-                      (unsigned long)(_etext - _stext) >> PAGE_SHIFT);
-       set_memory_ro((unsigned long)_etext,
-                     (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT);
-       set_memory_rox((unsigned long)_sinittext,
-                      (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT);
-       set_memory_rox(__stext_amode31,
-                      (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT);
-
-       /* lowcore must be executable for LPSWE */
-       if (static_key_enabled(&cpu_has_bear))
-               set_memory_nx(0, 1);
-       set_memory_nx(PAGE_SIZE, 1);
-       if (debug_pagealloc_enabled())
-               set_memory_4k(0, ident_map_size >> PAGE_SHIFT);
-
+       if (MACHINE_HAS_NX)
+               ctl_set_bit(0, 20);
        pr_info("Write protected kernel read-only data: %luk\n",
                (unsigned long)(__end_rodata - _stext) >> 10);
 }
index 5e9371f..de2fb12 100644 (file)
@@ -2088,6 +2088,7 @@ struct bpf_tramp_jit {
                                 */
        int r14_off;            /* Offset of saved %r14 */
        int run_ctx_off;        /* Offset of struct bpf_tramp_run_ctx */
+       int tccnt_off;          /* Offset of saved tailcall counter */
        int do_fexit;           /* do_fexit: label */
 };
 
@@ -2258,12 +2259,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
        tjit->r14_off = alloc_stack(tjit, sizeof(u64));
        tjit->run_ctx_off = alloc_stack(tjit,
                                        sizeof(struct bpf_tramp_run_ctx));
+       tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
        /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
        tjit->stack_size -= STACK_FRAME_OVERHEAD;
        tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
 
        /* aghi %r15,-stack_size */
        EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+       /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
+       _EMIT6(0xd203f000 | tjit->tccnt_off,
+              0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
        /* stmg %r2,%rN,fwd_reg_args_off(%r15) */
        if (nr_reg_args)
                EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
@@ -2400,6 +2405,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
                                       (nr_stack_args * sizeof(u64) - 1) << 16 |
                                       tjit->stack_args_off,
                               0xf000 | tjit->orig_stack_args_off);
+               /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+               _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
                /* lgr %r1,%r8 */
                EMIT4(0xb9040000, REG_1, REG_8);
                /* %r1() */
@@ -2456,6 +2463,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
        if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
                EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
                              tjit->retval_off);
+       /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+       _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
+              0xf000 | tjit->tccnt_off);
        /* aghi %r15,stack_size */
        EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
        /* Emit an expoline for the following indirect jump. */
index 1517921..645cccf 100644 (file)
@@ -531,7 +531,7 @@ static int __init ap325rxa_devices_setup(void)
        device_initialize(&ap325rxa_ceu_device.dev);
        dma_declare_coherent_memory(&ap325rxa_ceu_device.dev,
                        ceu_dma_membase, ceu_dma_membase,
-                       ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
+                       CEU_BUFFER_MEMORY_SIZE);
 
        platform_device_add(&ap325rxa_ceu_device);
 
index 3105136..3be2933 100644 (file)
@@ -1454,15 +1454,13 @@ static int __init arch_setup(void)
        device_initialize(&ecovec_ceu_devices[0]->dev);
        dma_declare_coherent_memory(&ecovec_ceu_devices[0]->dev,
                                    ceu0_dma_membase, ceu0_dma_membase,
-                                   ceu0_dma_membase +
-                                   CEU_BUFFER_MEMORY_SIZE - 1);
+                                   CEU_BUFFER_MEMORY_SIZE);
        platform_device_add(ecovec_ceu_devices[0]);
 
        device_initialize(&ecovec_ceu_devices[1]->dev);
        dma_declare_coherent_memory(&ecovec_ceu_devices[1]->dev,
                                    ceu1_dma_membase, ceu1_dma_membase,
-                                   ceu1_dma_membase +
-                                   CEU_BUFFER_MEMORY_SIZE - 1);
+                                   CEU_BUFFER_MEMORY_SIZE);
        platform_device_add(ecovec_ceu_devices[1]);
 
        gpiod_add_lookup_table(&cn12_power_gpiod_table);
index a18e803..6b775ea 100644 (file)
@@ -603,7 +603,7 @@ static int __init kfr2r09_devices_setup(void)
        device_initialize(&kfr2r09_ceu_device.dev);
        dma_declare_coherent_memory(&kfr2r09_ceu_device.dev,
                        ceu_dma_membase, ceu_dma_membase,
-                       ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
+                       CEU_BUFFER_MEMORY_SIZE);
 
        platform_device_add(&kfr2r09_ceu_device);
 
index f600612..773ee76 100644 (file)
@@ -604,7 +604,7 @@ static int __init migor_devices_setup(void)
        device_initialize(&migor_ceu_device.dev);
        dma_declare_coherent_memory(&migor_ceu_device.dev,
                        ceu_dma_membase, ceu_dma_membase,
-                       ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
+                       CEU_BUFFER_MEMORY_SIZE);
 
        platform_device_add(&migor_ceu_device);
 
index b60a262..6495f93 100644 (file)
@@ -940,15 +940,13 @@ static int __init devices_setup(void)
        device_initialize(&ms7724se_ceu_devices[0]->dev);
        dma_declare_coherent_memory(&ms7724se_ceu_devices[0]->dev,
                                    ceu0_dma_membase, ceu0_dma_membase,
-                                   ceu0_dma_membase +
-                                   CEU_BUFFER_MEMORY_SIZE - 1);
+                                   CEU_BUFFER_MEMORY_SIZE);
        platform_device_add(ms7724se_ceu_devices[0]);
 
        device_initialize(&ms7724se_ceu_devices[1]->dev);
        dma_declare_coherent_memory(&ms7724se_ceu_devices[1]->dev,
                                    ceu1_dma_membase, ceu1_dma_membase,
-                                   ceu1_dma_membase +
-                                   CEU_BUFFER_MEMORY_SIZE - 1);
+                                   CEU_BUFFER_MEMORY_SIZE);
        platform_device_add(ms7724se_ceu_devices[1]);
 
        return platform_add_devices(ms7724se_devices,
index c95f48f..6ecba5f 100644 (file)
@@ -101,8 +101,8 @@ static int switch_drv_remove(struct platform_device *pdev)
                device_remove_file(&pdev->dev, &dev_attr_switch);
 
        platform_set_drvdata(pdev, NULL);
-       flush_work(&psw->work);
        timer_shutdown_sync(&psw->debounce);
+       flush_work(&psw->work);
        free_irq(irq, pdev);
 
        kfree(psw);
index 595ca0b..43b0ae4 100644 (file)
@@ -2,6 +2,5 @@
 generated-y += syscall_table_32.h
 generated-y += syscall_table_64.h
 generic-y += agp.h
-generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
diff --git a/arch/sparc/include/asm/ide.h b/arch/sparc/include/asm/ide.h
deleted file mode 100644 (file)
index 904cc6c..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ide.h: SPARC PCI specific IDE glue.
- *
- * Copyright (C) 1997  David S. Miller (davem@davemloft.net)
- * Copyright (C) 1998  Eddie C. Dost   (ecd@skynet.be)
- * Adaptation from sparc64 version to sparc by Pete Zaitcev.
- */
-
-#ifndef _SPARC_IDE_H
-#define _SPARC_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm/io.h>
-#ifdef CONFIG_SPARC64
-#include <asm/spitfire.h>
-#include <asm/cacheflush.h>
-#include <asm/page.h>
-#else
-#include <linux/pgtable.h>
-#include <asm/psr.h>
-#endif
-
-#define __ide_insl(data_reg, buffer, wcount) \
-       __ide_insw(data_reg, buffer, (wcount)<<1)
-#define __ide_outsl(data_reg, buffer, wcount) \
-       __ide_outsw(data_reg, buffer, (wcount)<<1)
-
-/* On sparc, I/O ports and MMIO registers are accessed identically.  */
-#define __ide_mm_insw  __ide_insw
-#define __ide_mm_insl  __ide_insl
-#define __ide_mm_outsw __ide_outsw
-#define __ide_mm_outsl __ide_outsl
-
-static inline void __ide_insw(void __iomem *port, void *dst, u32 count)
-{
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-       unsigned long end = (unsigned long)dst + (count << 1);
-#endif
-       u16 *ps = dst;
-       u32 *pi;
-
-       if(((unsigned long)ps) & 0x2) {
-               *ps++ = __raw_readw(port);
-               count--;
-       }
-       pi = (u32 *)ps;
-       while(count >= 2) {
-               u32 w;
-
-               w  = __raw_readw(port) << 16;
-               w |= __raw_readw(port);
-               *pi++ = w;
-               count -= 2;
-       }
-       ps = (u16 *)pi;
-       if(count)
-               *ps++ = __raw_readw(port);
-
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-       __flush_dcache_range((unsigned long)dst, end);
-#endif
-}
-
-static inline void __ide_outsw(void __iomem *port, const void *src, u32 count)
-{
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-       unsigned long end = (unsigned long)src + (count << 1);
-#endif
-       const u16 *ps = src;
-       const u32 *pi;
-
-       if(((unsigned long)src) & 0x2) {
-               __raw_writew(*ps++, port);
-               count--;
-       }
-       pi = (const u32 *)ps;
-       while(count >= 2) {
-               u32 w;
-
-               w = *pi++;
-               __raw_writew((w >> 16), port);
-               __raw_writew(w, port);
-               count -= 2;
-       }
-       ps = (const u16 *)pi;
-       if(count)
-               __raw_writew(*ps, port);
-
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
-       __flush_dcache_range((unsigned long)src, end);
-#endif
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _SPARC_IDE_H */
index a269ad2..a3fdee4 100644 (file)
@@ -8,6 +8,7 @@
  * Copyright (C) 1997 Anton Blanchard (anton@progsoc.uts.edu.au)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <linux/errno.h>
 #include <linux/pgtable.h>
@@ -30,7 +31,6 @@
 #include <asm/unistd.h>
 
 #include <asm/asmmacro.h>
-#include <asm/export.h>
 
 #define curptr      g6
 
index 6044b82..964c61b 100644 (file)
@@ -11,6 +11,7 @@
  * CompactPCI platform by Eric Brower, 1999.
  */
 
+#include <linux/export.h>
 #include <linux/version.h>
 #include <linux/init.h>
 
@@ -25,7 +26,6 @@
 #include <asm/thread_info.h>   /* TI_UWINMASK */
 #include <asm/errno.h>
 #include <asm/pgtable.h>       /* PGDIR_SHIFT */
-#include <asm/export.h>
 
        .data
 /* The following are used with the prom_vector node-ops to figure out
index 72a5bdc..cf05491 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <linux/version.h>
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <linux/threads.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
@@ -33,7 +34,6 @@
 #include <asm/estate.h>
 #include <asm/sfafsr.h>
 #include <asm/unistd.h>
-#include <asm/export.h>
 
 /* This section from from _start to sparc64_boot_end should fit into
  * 0x0000000000404000 to 0x0000000000408000.
index a6f4ee3..635398e 100644 (file)
@@ -6,10 +6,10 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
-#include <asm/export.h>
 #define GLOBAL_SPARE   g7
 #else
 #define GLOBAL_SPARE   g5
index 9c8eb20..31a0c33 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 
 #include <asm/asi.h>
@@ -14,7 +15,6 @@
 #include <asm/ptrace.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
-#include <asm/export.h>
 
        /* On entry: %o5=current FPRS value, %g7 is callers address */
        /* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
index 2d72de8..2a9e7c4 100644 (file)
@@ -6,8 +6,8 @@
  * Copyright (C) 1999 David S. Miller (davem@redhat.com)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(__ashldi3)
index 05dfda9..8fd0b31 100644 (file)
@@ -6,8 +6,8 @@
  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(__ashrdi3)
index 8245d4a..4f8cab2 100644 (file)
@@ -4,10 +4,10 @@
  * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
-#include <asm/export.h>
 
        .text
 
index 9d647f9..9c91cbb 100644 (file)
@@ -4,10 +4,10 @@
  * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
-#include <asm/export.h>
 
        .text
 
index 76ddd1f..5b92959 100644 (file)
@@ -5,9 +5,9 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/page.h>
-#include <asm/export.h>
 
        /* Zero out 64 bytes of memory at (buf + offset).
         * Assumes %g1 contains zero.
index 87fec4c..2bfa44a 100644 (file)
@@ -5,8 +5,8 @@
  * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 
index 781e39b..84ad709 100644 (file)
@@ -14,8 +14,8 @@
  *     BSD4.4 portable checksum routine
  */
 
+#include <linux/export.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 
 #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5)        \
        ldd     [buf + offset + 0x00], t0;                      \
index 9700ef1..32b626f 100644 (file)
@@ -14,7 +14,7 @@
  *     BSD4.4 portable checksum routine
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
 
 csum_partial_fix_alignment:
index 302d345..e634581 100644 (file)
@@ -5,13 +5,13 @@
  * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
  */
 
+#include <linux/export.h>
 #include <linux/pgtable.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/spitfire.h>
 #include <asm/head.h>
-#include <asm/export.h>
 
        /* What we used to do was lock a TLB entry into a specific
         * TLB slot, clear the page with interrupts disabled, then
index 66e90bf..e23e6a6 100644 (file)
@@ -4,9 +4,9 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
-#include <asm/export.h>
 
 #define XCC xcc
 
index 5ebcfd4..7a041f3 100644 (file)
@@ -5,13 +5,13 @@
  * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
  */
 
+#include <linux/export.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <linux/pgtable.h>
 #include <asm/spitfire.h>
 #include <asm/head.h>
-#include <asm/export.h>
 
        /* What we used to do was lock a TLB entry into a specific
         * TLB slot, clear the page with interrupts disabled, then
index 954572c..7bb2ef6 100644 (file)
  * Returns 0 if successful, otherwise count of bytes not copied yet
  */
 
+#include <linux/export.h>
 #include <asm/ptrace.h>
 #include <asm/asmmacro.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
-#include <asm/export.h>
 
 /* Work around cpp -rob */
 #define ALLOC #alloc
index d839956..f968e83 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 #ifdef __KERNEL__
 #define GLOBAL_SPARE   %g7
index a738940..4ba901a 100644 (file)
@@ -5,7 +5,7 @@ This file is part of GNU CC.
 
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .globl __divdi3
index 5a11d86..3a9ad8f 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .register       %g2,#scratch
 
index 06b8d30..ccf97fb 100644 (file)
@@ -5,8 +5,8 @@
  * and onward.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
        .register       %g2, #scratch
index c83e22a..87005b6 100644 (file)
@@ -5,8 +5,8 @@
  * and onward.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
        .register       %g2, #scratch
index 0ddbbb0..eebee59 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
        .align  32
index 531d89c..7fa8fd4 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */
index 9a1289a..47a39f4 100644 (file)
@@ -7,11 +7,11 @@
  * Copyright (C) 1998 Jakub Jelinek   (jj@ultra.linux.cz)
  */
 
+#include <linux/export.h>
 #include <asm/ptrace.h>
 #include <asm/psr.h>
 #include <asm/smp.h>
 #include <asm/spinlock.h>
-#include <asm/export.h>
 
        .text
        .align  4
index 509ca66..09bf581 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 ENTRY(__lshrdi3)
        cmp     %o2, 0
index deba6fa..f7f7910 100644 (file)
@@ -6,8 +6,8 @@
  * This can also be tweaked for kernel stack overflow detection.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
 /*
  * This is the main variant and is called by C code.  GCC's -pg option
index a18076e..c87e800 100644 (file)
@@ -5,9 +5,9 @@
  * Copyright (C) 2000, 2008 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 
        .text
 ENTRY(memcmp)
index ee823d8..57b1ae0 100644 (file)
@@ -8,7 +8,8 @@
  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
+
 #define FUNC(x)                \
        .globl  x;              \
        .type   x,@function;    \
index 3132b63..543dda7 100644 (file)
@@ -5,8 +5,8 @@
  * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(memmove) /* o0=dst o1=src o2=len */
index c4c2d5b..5386a3a 100644 (file)
@@ -5,7 +5,7 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* In essence, this is just a fancy strlen. */
 
index 36dd638..70a4f21 100644 (file)
@@ -6,7 +6,7 @@
  * Copyright (C) 1998 David S. Miller (davem@redhat.com)
  */
 
-       #include <asm/export.h>
+#include <linux/export.h>
 
 #define HI_MAGIC       0x8080808080808080
 #define LO_MAGIC       0x0101010101010101
index eaff682..a33419d 100644 (file)
@@ -9,8 +9,8 @@
  * clear_user.
  */
 
+#include <linux/export.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 
 /* Work around cpp -rob */
 #define ALLOC #alloc
index 53054de..7e1e8cd 100644 (file)
@@ -5,7 +5,7 @@ This file is part of GNU CC.
 
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
        .text
        .align 4
        .globl __muldi3
index 2f187b2..5bb4c12 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
        .align  4
index dd111bb..27478b3 100644 (file)
@@ -6,9 +6,9 @@
  * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 
 #define LO_MAGIC 0x01010101
 #define HI_MAGIC 0x80808080
index 794733f..387bbf6 100644 (file)
@@ -4,8 +4,8 @@
  *            generic strncmp routine.
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 
        .text
 ENTRY(strncmp)
index 3d37d65..76c1207 100644 (file)
@@ -5,9 +5,9 @@
  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asi.h>
-#include <asm/export.h>
 
        .text
 ENTRY(strncmp)
index f6af7c7..35461e3 100644 (file)
@@ -9,12 +9,12 @@
  * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #include <asm/dcu.h>
 #include <asm/spitfire.h>
-#include <asm/export.h>
 
 /*
  *     Requirements:
index 0d41c94..b44d79d 100644 (file)
@@ -128,6 +128,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
                        goto no_cache_flush;
 
                /* A real file page? */
+               folio = page_folio(page);
                mapping = folio_flush_mapping(folio);
                if (!mapping)
                        goto no_cache_flush;
index 95315d3..5bfe5ca 100644 (file)
@@ -335,9 +335,5 @@ define archhelp
   echo  '                        bzdisk/fdimage*/hdimage/isoimage also accept:'
   echo  '                        FDARGS="..."  arguments for the booted kernel'
   echo  '                        FDINITRD=file initrd for the booted kernel'
-  echo  ''
-  echo  '  kvm_guest.config    - Enable Kconfig items for running this kernel as a KVM guest'
-  echo  '  xen.config          - Enable Kconfig items for running this kernel as a Xen guest'
-  echo  '  x86_debug.config    - Enable tip tree debugging options for testing'
 
 endef
index 2061ed1..58cb949 100644 (file)
 #define X86_FEATURE_SEV_ES             (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
 #define X86_FEATURE_V_TSC_AUX          (19*32+ 9) /* "" Virtual TSC_AUX */
 #define X86_FEATURE_SME_COHERENT       (19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP         (19*32+14) /* AMD SEV-ES full debug state swap support */
 
 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
 #define X86_FEATURE_NO_NESTED_DATA_BP  (20*32+ 0) /* "" No Nested Data Breakpoints */
index 3be6a98..c9f6a6c 100644 (file)
@@ -205,8 +205,6 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
 #endif
 #endif
 
-typedef void crash_vmclear_fn(void);
-extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
 extern void kdump_nmi_shootdown_cpus(void);
 
 #ifdef CONFIG_CRASH_HOTPLUG
index 3bc146d..1a4def3 100644 (file)
@@ -288,13 +288,13 @@ struct kvm_kernel_irq_routing_entry;
  * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
  * also includes TDP pages) to determine whether or not a page can be used in
  * the given MMU context.  This is a subset of the overall kvm_cpu_role to
- * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
- * 2 bytes per gfn instead of 4 bytes per gfn.
+ * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
+ * allocating 2 bytes per gfn instead of 4 bytes per gfn.
  *
  * Upper-level shadow pages having gptes are tracked for write-protection via
- * gfn_track.  As above, gfn_track is a 16 bit counter, so KVM must not create
- * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
- * gfn_track will overflow and explosions will ensure.
+ * gfn_write_track.  As above, gfn_write_track is a 16 bit counter, so KVM must
+ * not create more than 2^16-1 upper-level shadow pages at a single gfn,
+ * otherwise gfn_write_track will overflow and explosions will ensue.
  *
  * A unique shadow page (SP) for a gfn is created if and only if an existing SP
  * cannot be reused.  The ability to reuse a SP is tracked by its role, which
@@ -746,7 +746,6 @@ struct kvm_vcpu_arch {
        u64 smi_count;
        bool at_instruction_boundary;
        bool tpr_access_reporting;
-       bool xsaves_enabled;
        bool xfd_no_write_intercept;
        u64 ia32_xss;
        u64 microcode_version;
@@ -831,6 +830,25 @@ struct kvm_vcpu_arch {
        struct kvm_cpuid_entry2 *cpuid_entries;
        struct kvm_hypervisor_cpuid kvm_cpuid;
 
+       /*
+        * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
+        * when "struct kvm_vcpu_arch" is no longer defined in an
+        * arch/x86/include/asm header.  The max is mostly arbitrary, i.e.
+        * can be increased as necessary.
+        */
+#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
+
+       /*
+        * Track whether or not the guest is allowed to use features that are
+        * governed by KVM, where "governed" means KVM needs to manage state
+        * and/or explicitly enable the feature in hardware.  Typically, but
+        * not always, governed features can be used by the guest if and only
+        * if both KVM and userspace want to expose the feature to the guest.
+        */
+       struct {
+               DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
+       } governed_features;
+
        u64 reserved_gpa_bits;
        int maxphyaddr;
 
@@ -1005,7 +1023,7 @@ struct kvm_lpage_info {
 struct kvm_arch_memory_slot {
        struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
        struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
-       unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+       unsigned short *gfn_write_track;
 };
 
 /*
@@ -1247,8 +1265,9 @@ struct kvm_arch {
         * create an NX huge page (without hanging the guest).
         */
        struct list_head possible_nx_huge_pages;
-       struct kvm_page_track_notifier_node mmu_sp_tracker;
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
        struct kvm_page_track_notifier_head track_notifier_head;
+#endif
        /*
         * Protects marking pages unsync during page faults, as TDP MMU page
         * faults only take mmu_lock for read.  For simplicity, the unsync
@@ -1655,8 +1674,8 @@ struct kvm_x86_ops {
 
        u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
        u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
-       void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
-       void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
+       void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
+       void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
 
        /*
         * Retrieve somewhat arbitrary exit information.  Intended to
@@ -1795,8 +1814,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
 #define __KVM_HAVE_ARCH_VM_FREE
 void kvm_arch_free_vm(struct kvm *kvm);
 
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
        if (kvm_x86_ops.flush_remote_tlbs &&
            !static_call(kvm_x86_flush_remote_tlbs)(kvm))
@@ -1805,6 +1824,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
                return -ENOTSUPP;
 }
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
 #define kvm_arch_pmi_in_guest(vcpu) \
        ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
 
@@ -1833,7 +1854,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot);
-void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
 
index eb186bc..3d04074 100644 (file)
@@ -2,11 +2,9 @@
 #ifndef _ASM_X86_KVM_PAGE_TRACK_H
 #define _ASM_X86_KVM_PAGE_TRACK_H
 
-enum kvm_page_track_mode {
-       KVM_PAGE_TRACK_WRITE,
-       KVM_PAGE_TRACK_MAX,
-};
+#include <linux/kvm_types.h>
 
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 /*
  * The notifier represented by @kvm_page_track_notifier_node is linked into
  * the head which will be notified when guest is triggering the track event.
@@ -26,54 +24,39 @@ struct kvm_page_track_notifier_node {
         * It is called when guest is writing the write-tracked page
         * and write emulation is finished at that time.
         *
-        * @vcpu: the vcpu where the write access happened.
         * @gpa: the physical address written by guest.
         * @new: the data was written to the address.
         * @bytes: the written length.
         * @node: this node
         */
-       void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-                           int bytes, struct kvm_page_track_notifier_node *node);
+       void (*track_write)(gpa_t gpa, const u8 *new, int bytes,
+                           struct kvm_page_track_notifier_node *node);
+
        /*
-        * It is called when memory slot is being moved or removed
-        * users can drop write-protection for the pages in that memory slot
+        * Invoked when a memory region is removed from the guest.  Or in KVM
+        * terms, when a memslot is deleted.
         *
-        * @kvm: the kvm where memory slot being moved or removed
-        * @slot: the memory slot being moved or removed
-        * @node: this node
+        * @gfn:       base gfn of the region being removed
+        * @nr_pages:  number of pages in the to-be-removed region
+        * @node:      this node
         */
-       void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
-                           struct kvm_page_track_notifier_node *node);
+       void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
+                                   struct kvm_page_track_notifier_node *node);
 };
 
-int kvm_page_track_init(struct kvm *kvm);
-void kvm_page_track_cleanup(struct kvm *kvm);
+int kvm_page_track_register_notifier(struct kvm *kvm,
+                                    struct kvm_page_track_notifier_node *n);
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+                                       struct kvm_page_track_notifier_node *n);
 
-bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
-int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
-
-void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
-int kvm_page_track_create_memslot(struct kvm *kvm,
-                                 struct kvm_memory_slot *slot,
-                                 unsigned long npages);
-
-void kvm_slot_page_track_add_page(struct kvm *kvm,
-                                 struct kvm_memory_slot *slot, gfn_t gfn,
-                                 enum kvm_page_track_mode mode);
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
-                                    struct kvm_memory_slot *slot, gfn_t gfn,
-                                    enum kvm_page_track_mode mode);
-bool kvm_slot_page_track_is_active(struct kvm *kvm,
-                                  const struct kvm_memory_slot *slot,
-                                  gfn_t gfn, enum kvm_page_track_mode mode);
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
+#else
+/*
+ * Allow defining a node in a structure even if page tracking is disabled, e.g.
+ * to play nice with testing headers via direct inclusion from the command line.
+ */
+struct kvm_page_track_notifier_node {};
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
 
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
-                                struct kvm_page_track_notifier_node *n);
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
-                                  struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-                         int bytes);
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
 #endif
index 9177b43..6536873 100644 (file)
@@ -25,7 +25,14 @@ void __noreturn machine_real_restart(unsigned int type);
 #define MRR_BIOS       0
 #define MRR_APM                1
 
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+typedef void (cpu_emergency_virt_cb)(void);
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
 void cpu_emergency_disable_virtualization(void);
+#else
+static inline void cpu_emergency_disable_virtualization(void) {}
+#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
 
 typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
 void nmi_shootdown_cpus(nmi_shootdown_cb callback);
index e7c7379..19bf955 100644 (file)
@@ -288,6 +288,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
 
 #define AVIC_HPA_MASK  ~((0xFFFULL << 52) | 0xFFF)
 
+#define SVM_SEV_FEAT_DEBUG_SWAP                        BIT(5)
 
 struct vmcb_seg {
        u16 selector;
@@ -345,7 +346,7 @@ struct vmcb_save_area {
        u64 last_excp_from;
        u64 last_excp_to;
        u8 reserved_0x298[72];
-       u32 spec_ctrl;          /* Guest version of SPEC_CTRL at 0x2E0 */
+       u64 spec_ctrl;          /* Guest version of SPEC_CTRL at 0x2E0 */
 } __packed;
 
 /* Save area definition for SEV-ES and SEV-SNP guests */
@@ -512,7 +513,7 @@ struct ghcb {
 } __packed;
 
 
-#define EXPECTED_VMCB_SAVE_AREA_SIZE           740
+#define EXPECTED_VMCB_SAVE_AREA_SIZE           744
 #define EXPECTED_GHCB_SAVE_AREA_SIZE           1032
 #define EXPECTED_SEV_ES_SAVE_AREA_SIZE         1648
 #define EXPECTED_VMCB_CONTROL_AREA_SIZE                1024
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
deleted file mode 100644 (file)
index 3b12e6b..0000000
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* CPU virtualization extensions handling
- *
- * This should carry the code for handling CPU virtualization extensions
- * that needs to live in the kernel core.
- *
- * Author: Eduardo Habkost <ehabkost@redhat.com>
- *
- * Copyright (C) 2008, Red Hat Inc.
- *
- * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- */
-#ifndef _ASM_X86_VIRTEX_H
-#define _ASM_X86_VIRTEX_H
-
-#include <asm/processor.h>
-
-#include <asm/vmx.h>
-#include <asm/svm.h>
-#include <asm/tlbflush.h>
-
-/*
- * VMX functions:
- */
-
-static inline int cpu_has_vmx(void)
-{
-       unsigned long ecx = cpuid_ecx(1);
-       return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
-}
-
-
-/**
- * cpu_vmxoff() - Disable VMX on the current CPU
- *
- * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
- *
- * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
- * atomically track post-VMXON state, e.g. this may be called in NMI context.
- * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
- * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
- * magically in RM, VM86, compat mode, or at CPL>0.
- */
-static inline int cpu_vmxoff(void)
-{
-       asm_volatile_goto("1: vmxoff\n\t"
-                         _ASM_EXTABLE(1b, %l[fault])
-                         ::: "cc", "memory" : fault);
-
-       cr4_clear_bits(X86_CR4_VMXE);
-       return 0;
-
-fault:
-       cr4_clear_bits(X86_CR4_VMXE);
-       return -EIO;
-}
-
-static inline int cpu_vmx_enabled(void)
-{
-       return __read_cr4() & X86_CR4_VMXE;
-}
-
-/** Disable VMX if it is enabled on the current CPU
- *
- * You shouldn't call this if cpu_has_vmx() returns 0.
- */
-static inline void __cpu_emergency_vmxoff(void)
-{
-       if (cpu_vmx_enabled())
-               cpu_vmxoff();
-}
-
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
-       if (cpu_has_vmx())
-               __cpu_emergency_vmxoff();
-}
-
-
-
-
-/*
- * SVM functions:
- */
-
-/** Check if the CPU has SVM support
- *
- * You can use the 'msg' arg to get a message describing the problem,
- * if the function returns zero. Simply pass NULL if you are not interested
- * on the messages; gcc should take care of not generating code for
- * the messages on this case.
- */
-static inline int cpu_has_svm(const char **msg)
-{
-       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
-           boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) {
-               if (msg)
-                       *msg = "not amd or hygon";
-               return 0;
-       }
-
-       if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
-               if (msg)
-                       *msg = "can't execute cpuid_8000000a";
-               return 0;
-       }
-
-       if (!boot_cpu_has(X86_FEATURE_SVM)) {
-               if (msg)
-                       *msg = "svm not available";
-               return 0;
-       }
-       return 1;
-}
-
-
-/** Disable SVM on the current CPU
- *
- * You should call this only if cpu_has_svm() returned true.
- */
-static inline void cpu_svm_disable(void)
-{
-       uint64_t efer;
-
-       wrmsrl(MSR_VM_HSAVE_PA, 0);
-       rdmsrl(MSR_EFER, efer);
-       if (efer & EFER_SVME) {
-               /*
-                * Force GIF=1 prior to disabling SVM to ensure INIT and NMI
-                * aren't blocked, e.g. if a fatal error occurred between CLGI
-                * and STGI.  Note, STGI may #UD if SVM is disabled from NMI
-                * context between reading EFER and executing STGI.  In that
-                * case, GIF must already be set, otherwise the NMI would have
-                * been blocked, so just eat the fault.
-                */
-               asm_volatile_goto("1: stgi\n\t"
-                                 _ASM_EXTABLE(1b, %l[fault])
-                                 ::: "memory" : fault);
-fault:
-               wrmsrl(MSR_EFER, efer & ~EFER_SVME);
-       }
-}
-
-/** Makes sure SVM is disabled, if it is supported on the CPU
- */
-static inline void cpu_emergency_svm_disable(void)
-{
-       if (cpu_has_svm(NULL))
-               cpu_svm_disable();
-}
-
-#endif /* _ASM_X86_VIRTEX_H */
index 0d02c4a..0e73616 100644 (file)
@@ -71,7 +71,7 @@
 #define SECONDARY_EXEC_RDSEED_EXITING          VMCS_CONTROL_BIT(RDSEED_EXITING)
 #define SECONDARY_EXEC_ENABLE_PML               VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
 #define SECONDARY_EXEC_PT_CONCEAL_VMX          VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
-#define SECONDARY_EXEC_XSAVES                  VMCS_CONTROL_BIT(XSAVES)
+#define SECONDARY_EXEC_ENABLE_XSAVES           VMCS_CONTROL_BIT(XSAVES)
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC     VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
 #define SECONDARY_EXEC_PT_USE_GPA              VMCS_CONTROL_BIT(PT_USE_GPA)
 #define SECONDARY_EXEC_TSC_SCALING              VMCS_CONTROL_BIT(TSC_SCALING)
index 587c774..c92d886 100644 (file)
@@ -48,27 +48,6 @@ struct crash_memmap_data {
        unsigned int type;
 };
 
-/*
- * This is used to VMCLEAR all VMCSs loaded on the
- * processor. And when loading kvm_intel module, the
- * callback function pointer will be assigned.
- *
- * protected by rcu.
- */
-crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
-EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-
-static inline void cpu_crash_vmclear_loaded_vmcss(void)
-{
-       crash_vmclear_fn *do_vmclear_operation = NULL;
-
-       rcu_read_lock();
-       do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
-       if (do_vmclear_operation)
-               do_vmclear_operation();
-       rcu_read_unlock();
-}
-
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
 static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -76,11 +55,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
        crash_save_cpu(regs, cpu);
 
        /*
-        * VMCLEAR VMCSs loaded on all cpus if needed.
-        */
-       cpu_crash_vmclear_loaded_vmcss();
-
-       /*
         * Disable Intel PT to stop its logging
         */
        cpu_emergency_stop_pt();
@@ -133,11 +107,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 
        crash_smp_send_stop();
 
-       /*
-        * VMCLEAR VMCSs loaded on this cpu if needed.
-        */
-       cpu_crash_vmclear_loaded_vmcss();
-
        cpu_emergency_disable_virtualization();
 
        /*
index 3adbe97..830425e 100644 (file)
@@ -22,7 +22,6 @@
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
 #include <asm/pci_x86.h>
-#include <asm/virtext.h>
 #include <asm/cpu.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
@@ -530,9 +529,54 @@ static inline void kb_wait(void)
 
 static inline void nmi_shootdown_cpus_on_restart(void);
 
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+/* RCU-protected callback to disable virtualization prior to reboot. */
+static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
+
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
+{
+       if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
+               return;
+
+       rcu_assign_pointer(cpu_emergency_virt_callback, callback);
+}
+EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback);
+
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
+{
+       if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
+               return;
+
+       rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
+       synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
+
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+       cpu_emergency_virt_cb *callback;
+
+       /*
+        * IRQs must be disabled as KVM enables virtualization in hardware via
+        * function call IPIs, i.e. IRQs need to be disabled to guarantee
+        * virtualization stays disabled.
+        */
+       lockdep_assert_irqs_disabled();
+
+       rcu_read_lock();
+       callback = rcu_dereference(cpu_emergency_virt_callback);
+       if (callback)
+               callback();
+       rcu_read_unlock();
+}
+
 static void emergency_reboot_disable_virtualization(void)
 {
-       /* Just make sure we won't change CPUs while doing this */
        local_irq_disable();
 
        /*
@@ -545,7 +589,7 @@ static void emergency_reboot_disable_virtualization(void)
         * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
         * other CPUs may have virtualization enabled.
         */
-       if (cpu_has_vmx() || cpu_has_svm(NULL)) {
+       if (rcu_access_pointer(cpu_emergency_virt_callback)) {
                /* Safely force _this_ CPU out of VMX/SVM operation. */
                cpu_emergency_disable_virtualization();
 
@@ -553,7 +597,9 @@ static void emergency_reboot_disable_virtualization(void)
                nmi_shootdown_cpus_on_restart();
        }
 }
-
+#else
+static void emergency_reboot_disable_virtualization(void) { }
+#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
 
 void __attribute__((weak)) mach_reboot_fixups(void)
 {
@@ -787,21 +833,9 @@ void machine_crash_shutdown(struct pt_regs *regs)
 }
 #endif
 
-
 /* This is the CPU performing the emergency shutdown work. */
 int crashing_cpu = -1;
 
-/*
- * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
- * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
- * GIF=0, i.e. if the crash occurred between CLGI and STGI.
- */
-void cpu_emergency_disable_virtualization(void)
-{
-       cpu_emergency_vmxoff();
-       cpu_emergency_svm_disable();
-}
-
 #if defined(CONFIG_SMP)
 
 static nmi_shootdown_cb shootdown_callback;
index 89ca7f4..ed90f14 100644 (file)
@@ -101,7 +101,7 @@ config X86_SGX_KVM
 
 config KVM_AMD
        tristate "KVM for AMD processors support"
-       depends on KVM
+       depends on KVM && (CPU_SUP_AMD || CPU_SUP_HYGON)
        help
          Provides support for KVM on AMD processors equipped with the AMD-V
          (SVM) extensions.
@@ -138,6 +138,19 @@ config KVM_XEN
 
          If in doubt, say "N".
 
+config KVM_PROVE_MMU
+       bool "Prove KVM MMU correctness"
+       depends on DEBUG_KERNEL
+       depends on KVM
+       depends on EXPERT
+       help
+         Enables runtime assertions in KVM's MMU that are too costly to enable
+         in anything remotely resembling a production environment, e.g. this
+         gates code that verifies a to-be-freed page table doesn't have any
+         present SPTEs.
+
+         If in doubt, say "N".
+
 config KVM_EXTERNAL_WRITE_TRACKING
        bool
 
index d343268..0544e30 100644 (file)
@@ -11,6 +11,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kvm_host.h>
+#include "linux/lockdep.h"
 #include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
@@ -84,6 +85,18 @@ static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
        struct kvm_cpuid_entry2 *e;
        int i;
 
+       /*
+        * KVM has a semi-arbitrary rule that querying the guest's CPUID model
+        * with IRQs disabled is disallowed.  The CPUID model can legitimately
+        * have over one hundred entries, i.e. the lookup is slow, and IRQs are
+        * typically disabled in KVM only when KVM is in a performance critical
+        * path, e.g. the core VM-Enter/VM-Exit run loop.  Nothing will break
+        * if this rule is violated, this assertion is purely to flag potential
+        * performance issues.  If this fires, consider moving the lookup out
+        * of the hotpath, e.g. by caching information during CPUID updates.
+        */
+       lockdep_assert_irqs_enabled();
+
        for (i = 0; i < nent; i++) {
                e = &entries[i];
 
@@ -312,6 +325,27 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
        struct kvm_cpuid_entry2 *best;
+       bool allow_gbpages;
+
+       BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
+       bitmap_zero(vcpu->arch.governed_features.enabled,
+                   KVM_MAX_NR_GOVERNED_FEATURES);
+
+       /*
+        * If TDP is enabled, let the guest use GBPAGES if they're supported in
+        * hardware.  The hardware page walker doesn't let KVM disable GBPAGES,
+        * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
+        * walk for performance and complexity reasons.  Not to mention KVM
+        * _can't_ solve the problem because GVA->GPA walks aren't visible to
+        * KVM once a TDP translation is installed.  Mimic hardware behavior so
+        * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
+        * If TDP is disabled, honor *only* guest CPUID as KVM has full control
+        * and can install smaller shadow pages if the host lacks 1GiB support.
+        */
+       allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
+                                     guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
+       if (allow_gbpages)
+               kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
 
        best = kvm_find_cpuid_entry(vcpu, 1);
        if (best && apic) {
@@ -647,7 +681,8 @@ void kvm_set_cpu_caps(void)
        );
 
        kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
-               F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI)
+               F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
+               F(AMX_COMPLEX)
        );
 
        kvm_cpu_cap_mask(CPUID_D_1_EAX,
@@ -1154,6 +1189,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
                cpuid_entry_override(entry, CPUID_8000_0001_EDX);
                cpuid_entry_override(entry, CPUID_8000_0001_ECX);
                break;
+       case 0x80000005:
+               /*  Pass host L1 cache and TLB info. */
+               break;
        case 0x80000006:
                /* Drop reserved bits, pass host L2 cache and TLB info. */
                entry->edx &= ~GENMASK(17, 16);
index b1658c0..284fa47 100644 (file)
@@ -232,4 +232,50 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
        return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
 }
 
+enum kvm_governed_features {
+#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x,
+#include "governed_features.h"
+       KVM_NR_GOVERNED_FEATURES
+};
+
+static __always_inline int kvm_governed_feature_index(unsigned int x86_feature)
+{
+       switch (x86_feature) {
+#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x;
+#include "governed_features.h"
+       default:
+               return -1;
+       }
+}
+
+static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature)
+{
+       return kvm_governed_feature_index(x86_feature) >= 0;
+}
+
+static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu,
+                                                    unsigned int x86_feature)
+{
+       BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+
+       __set_bit(kvm_governed_feature_index(x86_feature),
+                 vcpu->arch.governed_features.enabled);
+}
+
+static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu,
+                                                              unsigned int x86_feature)
+{
+       if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
+               kvm_governed_feature_set(vcpu, x86_feature);
+}
+
+static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
+                                         unsigned int x86_feature)
+{
+       BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+
+       return test_bit(kvm_governed_feature_index(x86_feature),
+                       vcpu->arch.governed_features.enabled);
+}
+
 #endif
index 936a397..2673cd5 100644 (file)
@@ -1799,13 +1799,11 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
                                               op->addr.mem,
                                               &op->val,
                                               op->bytes);
-               break;
        case OP_MEM_STR:
                return segmented_write(ctxt,
                                       op->addr.mem,
                                       op->data,
                                       op->bytes * op->count);
-               break;
        case OP_XMM:
                kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
                break;
diff --git a/arch/x86/kvm/governed_features.h b/arch/x86/kvm/governed_features.h
new file mode 100644 (file)
index 0000000..423a733
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE)
+BUILD_BUG()
+#endif
+
+#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x)
+
+KVM_GOVERNED_X86_FEATURE(GBPAGES)
+KVM_GOVERNED_X86_FEATURE(XSAVES)
+KVM_GOVERNED_X86_FEATURE(VMX)
+KVM_GOVERNED_X86_FEATURE(NRIPS)
+KVM_GOVERNED_X86_FEATURE(TSCRATEMSR)
+KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD)
+KVM_GOVERNED_X86_FEATURE(LBRV)
+KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
+KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
+KVM_GOVERNED_X86_FEATURE(VGIF)
+KVM_GOVERNED_X86_FEATURE(VNMI)
+
+#undef KVM_GOVERNED_X86_FEATURE
+#undef KVM_GOVERNED_FEATURE
index b28fd02..7c2dac6 100644 (file)
@@ -1293,7 +1293,6 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
        case HV_X64_MSR_VP_ASSIST_PAGE:
                return hv_vcpu->cpuid_cache.features_eax &
                        HV_MSR_APIC_ACCESS_AVAILABLE;
-               break;
        case HV_X64_MSR_TSC_FREQUENCY:
        case HV_X64_MSR_APIC_FREQUENCY:
                return hv_vcpu->cpuid_cache.features_eax &
index ab65f3a..be7aeb9 100644 (file)
@@ -213,7 +213,6 @@ struct x86_emulate_ops {
 
        bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
                          u32 *ecx, u32 *edx, bool exact_only);
-       bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
        bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
        bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
        bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
index a983a16..dcd60b3 100644 (file)
@@ -376,7 +376,8 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
        struct kvm_vcpu *vcpu;
        unsigned long i;
        u32 max_id = 255; /* enough space for any xAPIC ID */
-       bool xapic_id_mismatch = false;
+       bool xapic_id_mismatch;
+       int r;
 
        /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
        if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
@@ -386,9 +387,14 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
                  "Dirty APIC map without an in-kernel local APIC");
 
        mutex_lock(&kvm->arch.apic_map_lock);
+
+retry:
        /*
-        * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
-        * (if clean) or the APIC registers (if dirty).
+        * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
+        * or the APIC registers (if dirty).  Note, on retry the map may have
+        * not yet been marked dirty by whatever task changed a vCPU's x2APIC
+        * ID, i.e. the map may still show up as in-progress.  In that case
+        * this task still needs to retry and complete its calculation.
         */
        if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
                                   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
@@ -397,6 +403,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
                return;
        }
 
+       /*
+        * Reset the mismatch flag between attempts so that KVM does the right
+        * thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
+        * keep max_id strictly increasing.  Disallowing max_id from shrinking
+        * ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
+        * with the highest x2APIC ID is toggling its APIC on and off.
+        */
+       xapic_id_mismatch = false;
+
        kvm_for_each_vcpu(i, vcpu, kvm)
                if (kvm_apic_present(vcpu))
                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
@@ -415,9 +430,15 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
                if (!kvm_apic_present(vcpu))
                        continue;
 
-               if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) {
+               r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
+               if (r) {
                        kvfree(new);
                        new = NULL;
+                       if (r == -E2BIG) {
+                               cond_resched();
+                               goto retry;
+                       }
+
                        goto out;
                }
 
index 92d5a19..253fb20 100644 (file)
@@ -121,6 +121,8 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+                        int bytes);
 
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
index ec169f5..e1d011c 100644 (file)
@@ -25,6 +25,7 @@
 #include "kvm_cache_regs.h"
 #include "smm.h"
 #include "kvm_emulate.h"
+#include "page_track.h"
 #include "cpuid.h"
 #include "spte.h"
 
@@ -53,7 +54,7 @@
 #include <asm/io.h>
 #include <asm/set_memory.h>
 #include <asm/vmx.h>
-#include <asm/kvm_page_track.h>
+
 #include "trace.h"
 
 extern bool itlb_multihit_kvm_mitigation;
@@ -115,11 +116,6 @@ static int max_huge_page_level __read_mostly;
 static int tdp_root_level __read_mostly;
 static int max_tdp_level __read_mostly;
 
-#ifdef MMU_DEBUG
-bool dbg = 0;
-module_param(dbg, bool, 0644);
-#endif
-
 #define PTE_PREFETCH_NUM               8
 
 #include <trace/events/kvm.h>
@@ -278,16 +274,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void)
        return kvm_x86_ops.flush_remote_tlbs_range;
 }
 
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-                                gfn_t nr_pages)
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
 {
-       int ret = -EOPNOTSUPP;
+       if (!kvm_x86_ops.flush_remote_tlbs_range)
+               return -EOPNOTSUPP;
 
-       if (kvm_x86_ops.flush_remote_tlbs_range)
-               ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
-                                                                  nr_pages);
-       if (ret)
-               kvm_flush_remote_tlbs(kvm);
+       return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
 }
 
 static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
@@ -490,7 +482,7 @@ retry:
  */
 static void mmu_spte_set(u64 *sptep, u64 new_spte)
 {
-       WARN_ON(is_shadow_present_pte(*sptep));
+       WARN_ON_ONCE(is_shadow_present_pte(*sptep));
        __set_spte(sptep, new_spte);
 }
 
@@ -502,7 +494,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
 {
        u64 old_spte = *sptep;
 
-       WARN_ON(!is_shadow_present_pte(new_spte));
+       WARN_ON_ONCE(!is_shadow_present_pte(new_spte));
        check_spte_writable_invariants(new_spte);
 
        if (!is_shadow_present_pte(old_spte)) {
@@ -515,7 +507,7 @@ static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte)
        else
                old_spte = __update_clear_spte_slow(sptep, new_spte);
 
-       WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
+       WARN_ON_ONCE(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
 
        return old_spte;
 }
@@ -597,7 +589,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
         * by a refcounted page, the refcount is elevated.
         */
        page = kvm_pfn_to_refcounted_page(pfn);
-       WARN_ON(page && !page_count(page));
+       WARN_ON_ONCE(page && !page_count(page));
 
        if (is_accessed_spte(old_spte))
                kvm_set_pfn_accessed(pfn);
@@ -812,7 +804,7 @@ static void update_gfn_disallow_lpage_count(const struct kvm_memory_slot *slot,
        for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
                linfo = lpage_info_slot(gfn, slot, i);
                linfo->disallow_lpage += count;
-               WARN_ON(linfo->disallow_lpage < 0);
+               WARN_ON_ONCE(linfo->disallow_lpage < 0);
        }
 }
 
@@ -839,8 +831,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 
        /* the non-leaf shadow pages are keeping readonly. */
        if (sp->role.level > PG_LEVEL_4K)
-               return kvm_slot_page_track_add_page(kvm, slot, gfn,
-                                                   KVM_PAGE_TRACK_WRITE);
+               return __kvm_write_track_add_gfn(kvm, slot, gfn);
 
        kvm_mmu_gfn_disallow_lpage(slot, gfn);
 
@@ -886,8 +877,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
        slots = kvm_memslots_for_spte_role(kvm, sp->role);
        slot = __gfn_to_memslot(slots, gfn);
        if (sp->role.level > PG_LEVEL_4K)
-               return kvm_slot_page_track_remove_page(kvm, slot, gfn,
-                                                      KVM_PAGE_TRACK_WRITE);
+               return __kvm_write_track_remove_gfn(kvm, slot, gfn);
 
        kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
@@ -941,10 +931,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
        int count = 0;
 
        if (!rmap_head->val) {
-               rmap_printk("%p %llx 0->1\n", spte, *spte);
                rmap_head->val = (unsigned long)spte;
        } else if (!(rmap_head->val & 1)) {
-               rmap_printk("%p %llx 1->many\n", spte, *spte);
                desc = kvm_mmu_memory_cache_alloc(cache);
                desc->sptes[0] = (u64 *)rmap_head->val;
                desc->sptes[1] = spte;
@@ -953,7 +941,6 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
                rmap_head->val = (unsigned long)desc | 1;
                ++count;
        } else {
-               rmap_printk("%p %llx many->many\n", spte, *spte);
                desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
                count = desc->tail_count + desc->spte_count;
 
@@ -973,7 +960,8 @@ static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
        return count;
 }
 
-static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+static void pte_list_desc_remove_entry(struct kvm *kvm,
+                                      struct kvm_rmap_head *rmap_head,
                                       struct pte_list_desc *desc, int i)
 {
        struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
@@ -984,7 +972,7 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
         * when adding an entry and the previous head is full, and heads are
         * removed (this flow) when they become empty.
         */
-       BUG_ON(j < 0);
+       KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm);
 
        /*
         * Replace the to-be-freed SPTE with the last valid entry from the head
@@ -1009,35 +997,34 @@ static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
        mmu_free_pte_list_desc(head_desc);
 }
 
-static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+static void pte_list_remove(struct kvm *kvm, u64 *spte,
+                           struct kvm_rmap_head *rmap_head)
 {
        struct pte_list_desc *desc;
        int i;
 
-       if (!rmap_head->val) {
-               pr_err("%s: %p 0->BUG\n", __func__, spte);
-               BUG();
-       } else if (!(rmap_head->val & 1)) {
-               rmap_printk("%p 1->0\n", spte);
-               if ((u64 *)rmap_head->val != spte) {
-                       pr_err("%s:  %p 1->BUG\n", __func__, spte);
-                       BUG();
-               }
+       if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
+               return;
+
+       if (!(rmap_head->val & 1)) {
+               if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
+                       return;
+
                rmap_head->val = 0;
        } else {
-               rmap_printk("%p many->many\n", spte);
                desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
                while (desc) {
                        for (i = 0; i < desc->spte_count; ++i) {
                                if (desc->sptes[i] == spte) {
-                                       pte_list_desc_remove_entry(rmap_head, desc, i);
+                                       pte_list_desc_remove_entry(kvm, rmap_head,
+                                                                  desc, i);
                                        return;
                                }
                        }
                        desc = desc->more;
                }
-               pr_err("%s: %p many->many\n", __func__, spte);
-               BUG();
+
+               KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
        }
 }
 
@@ -1045,7 +1032,7 @@ static void kvm_zap_one_rmap_spte(struct kvm *kvm,
                                  struct kvm_rmap_head *rmap_head, u64 *sptep)
 {
        mmu_spte_clear_track_bits(kvm, sptep);
-       pte_list_remove(sptep, rmap_head);
+       pte_list_remove(kvm, sptep, rmap_head);
 }
 
 /* Return true if at least one SPTE was zapped, false otherwise */
@@ -1120,7 +1107,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
        slot = __gfn_to_memslot(slots, gfn);
        rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
 
-       pte_list_remove(spte, rmap_head);
+       pte_list_remove(kvm, spte, rmap_head);
 }
 
 /*
@@ -1212,7 +1199,7 @@ static void drop_large_spte(struct kvm *kvm, u64 *sptep, bool flush)
        struct kvm_mmu_page *sp;
 
        sp = sptep_to_sp(sptep);
-       WARN_ON(sp->role.level == PG_LEVEL_4K);
+       WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K);
 
        drop_spte(kvm, sptep);
 
@@ -1241,8 +1228,6 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect)
            !(pt_protect && is_mmu_writable_spte(spte)))
                return false;
 
-       rmap_printk("spte %p %llx\n", sptep, *sptep);
-
        if (pt_protect)
                spte &= ~shadow_mmu_writable_mask;
        spte = spte & ~PT_WRITABLE_MASK;
@@ -1267,9 +1252,7 @@ static bool spte_clear_dirty(u64 *sptep)
 {
        u64 spte = *sptep;
 
-       rmap_printk("spte %p %llx\n", sptep, *sptep);
-
-       MMU_WARN_ON(!spte_ad_enabled(spte));
+       KVM_MMU_WARN_ON(!spte_ad_enabled(spte));
        spte &= ~shadow_dirty_mask;
        return mmu_spte_update(sptep, spte);
 }
@@ -1475,14 +1458,11 @@ static bool kvm_set_pte_rmap(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
        u64 new_spte;
        kvm_pfn_t new_pfn;
 
-       WARN_ON(pte_huge(pte));
+       WARN_ON_ONCE(pte_huge(pte));
        new_pfn = pte_pfn(pte);
 
 restart:
        for_each_rmap_spte(rmap_head, &iter, sptep) {
-               rmap_printk("spte %p %llx gfn %llx (%d)\n",
-                           sptep, *sptep, gfn, level);
-
                need_flush = true;
 
                if (pte_write(pte)) {
@@ -1588,7 +1568,7 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
        for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
                                 range->start, range->end - 1, &iterator)
                ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
-                              iterator.level, range->pte);
+                              iterator.level, range->arg.pte);
 
        return ret;
 }
@@ -1710,21 +1690,19 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
        return young;
 }
 
-#ifdef MMU_DEBUG
-static int is_empty_shadow_page(u64 *spt)
+static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp)
 {
-       u64 *pos;
-       u64 *end;
+#ifdef CONFIG_KVM_PROVE_MMU
+       int i;
 
-       for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++)
-               if (is_shadow_present_pte(*pos)) {
-                       printk(KERN_ERR "%s: %p %llx\n", __func__,
-                              pos, *pos);
-                       return 0;
-               }
-       return 1;
-}
+       for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
+               if (KVM_MMU_WARN_ON(is_shadow_present_pte(sp->spt[i])))
+                       pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free",
+                                          sp->spt[i], &sp->spt[i],
+                                          kvm_mmu_page_get_gfn(sp, i));
+       }
 #endif
+}
 
 /*
  * This value is the sum of all of the kvm instances's
@@ -1752,7 +1730,8 @@ static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
 {
-       MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
+       kvm_mmu_check_sptes_at_free(sp);
+
        hlist_del(&sp->hash_link);
        list_del(&sp->link);
        free_page((unsigned long)sp->spt);
@@ -1775,16 +1754,16 @@ static void mmu_page_add_parent_pte(struct kvm_mmu_memory_cache *cache,
        pte_list_add(cache, parent_pte, &sp->parent_ptes);
 }
 
-static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
+static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
                                       u64 *parent_pte)
 {
-       pte_list_remove(parent_pte, &sp->parent_ptes);
+       pte_list_remove(kvm, parent_pte, &sp->parent_ptes);
 }
 
-static void drop_parent_pte(struct kvm_mmu_page *sp,
+static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
                            u64 *parent_pte)
 {
-       mmu_page_remove_parent_pte(sp, parent_pte);
+       mmu_page_remove_parent_pte(kvm, sp, parent_pte);
        mmu_spte_clear_no_track(parent_pte);
 }
 
@@ -1840,7 +1819,7 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
 static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
 {
        --sp->unsync_children;
-       WARN_ON((int)sp->unsync_children < 0);
+       WARN_ON_ONCE((int)sp->unsync_children < 0);
        __clear_bit(idx, sp->unsync_child_bitmap);
 }
 
@@ -1898,7 +1877,7 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp,
 
 static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-       WARN_ON(!sp->unsync);
+       WARN_ON_ONCE(!sp->unsync);
        trace_kvm_mmu_sync_page(sp);
        sp->unsync = 0;
        --kvm->stat.mmu_unsync;
@@ -2073,11 +2052,11 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec,
        if (pvec->nr == 0)
                return 0;
 
-       WARN_ON(pvec->page[0].idx != INVALID_INDEX);
+       WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX);
 
        sp = pvec->page[0].sp;
        level = sp->role.level;
-       WARN_ON(level == PG_LEVEL_4K);
+       WARN_ON_ONCE(level == PG_LEVEL_4K);
 
        parents->parent[level-2] = sp;
 
@@ -2099,7 +2078,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
                if (!sp)
                        return;
 
-               WARN_ON(idx == INVALID_INDEX);
+               WARN_ON_ONCE(idx == INVALID_INDEX);
                clear_unsync_child_bit(sp, idx);
                level++;
        } while (!sp->unsync_children);
@@ -2220,7 +2199,7 @@ static struct kvm_mmu_page *kvm_mmu_find_shadow_page(struct kvm *kvm,
                        if (ret < 0)
                                break;
 
-                       WARN_ON(!list_empty(&invalid_list));
+                       WARN_ON_ONCE(!list_empty(&invalid_list));
                        if (ret > 0)
                                kvm_flush_remote_tlbs(kvm);
                }
@@ -2499,7 +2478,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                if (child->role.access == direct_access)
                        return;
 
-               drop_parent_pte(child, sptep);
+               drop_parent_pte(vcpu->kvm, child, sptep);
                kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
        }
 }
@@ -2517,7 +2496,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
                        drop_spte(kvm, spte);
                } else {
                        child = spte_to_child_sp(pte);
-                       drop_parent_pte(child, spte);
+                       drop_parent_pte(kvm, child, spte);
 
                        /*
                         * Recursively zap nested TDP SPs, parentless SPs are
@@ -2548,13 +2527,13 @@ static int kvm_mmu_page_unlink_children(struct kvm *kvm,
        return zapped;
 }
 
-static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
+static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
        u64 *sptep;
        struct rmap_iterator iter;
 
        while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
-               drop_parent_pte(sp, sptep);
+               drop_parent_pte(kvm, sp, sptep);
 }
 
 static int mmu_zap_unsync_children(struct kvm *kvm,
@@ -2593,7 +2572,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
        ++kvm->stat.mmu_shadow_zapped;
        *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
        *nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
-       kvm_mmu_unlink_parents(sp);
+       kvm_mmu_unlink_parents(kvm, sp);
 
        /* Zapping children means active_mmu_pages has become unstable. */
        list_unstable = *nr_zapped;
@@ -2675,7 +2654,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
        kvm_flush_remote_tlbs(kvm);
 
        list_for_each_entry_safe(sp, nsp, invalid_list, link) {
-               WARN_ON(!sp->role.invalid || sp->root_count);
+               WARN_ON_ONCE(!sp->role.invalid || sp->root_count);
                kvm_mmu_free_shadow_page(sp);
        }
 }
@@ -2775,12 +2754,9 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
        LIST_HEAD(invalid_list);
        int r;
 
-       pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
        r = 0;
        write_lock(&kvm->mmu_lock);
        for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) {
-               pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
-                        sp->role.word);
                r = 1;
                kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
        }
@@ -2831,7 +2807,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
         * track machinery is used to write-protect upper-level shadow pages,
         * i.e. this guards the role.level == 4K assertion below!
         */
-       if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE))
+       if (kvm_gfn_is_write_tracked(kvm, slot, gfn))
                return -EPERM;
 
        /*
@@ -2873,7 +2849,7 @@ int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
                                continue;
                }
 
-               WARN_ON(sp->role.level != PG_LEVEL_4K);
+               WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
                kvm_unsync_page(kvm, sp);
        }
        if (locked)
@@ -2938,9 +2914,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
        bool prefetch = !fault || fault->prefetch;
        bool write_fault = fault && fault->write;
 
-       pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
-                *sptep, write_fault, gfn);
-
        if (unlikely(is_noslot_pfn(pfn))) {
                vcpu->stat.pf_mmio_spte_created++;
                mark_mmio_spte(vcpu, sptep, gfn, pte_access);
@@ -2957,11 +2930,9 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
                        u64 pte = *sptep;
 
                        child = spte_to_child_sp(pte);
-                       drop_parent_pte(child, sptep);
+                       drop_parent_pte(vcpu->kvm, child, sptep);
                        flush = true;
                } else if (pfn != spte_to_pfn(*sptep)) {
-                       pgprintk("hfn old %llx new %llx\n",
-                                spte_to_pfn(*sptep), pfn);
                        drop_spte(vcpu->kvm, sptep);
                        flush = true;
                } else
@@ -2986,8 +2957,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
        if (flush)
                kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
 
-       pgprintk("%s: setting spte %llx\n", __func__, *sptep);
-
        if (!was_rmapped) {
                WARN_ON_ONCE(ret == RET_PF_SPURIOUS);
                rmap_add(vcpu, slot, sptep, gfn, pte_access);
@@ -3033,7 +3002,7 @@ static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
        u64 *spte, *start = NULL;
        int i;
 
-       WARN_ON(!sp->role.direct);
+       WARN_ON_ONCE(!sp->role.direct);
 
        i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1);
        spte = sp->spt + i;
@@ -3574,12 +3543,8 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
        if (!VALID_PAGE(*root_hpa))
                return;
 
-       /*
-        * The "root" may be a special root, e.g. a PAE entry, treat it as a
-        * SPTE to ensure any non-PA bits are dropped.
-        */
-       sp = spte_to_child_sp(*root_hpa);
-       if (WARN_ON(!sp))
+       sp = root_to_sp(*root_hpa);
+       if (WARN_ON_ONCE(!sp))
                return;
 
        if (is_tdp_mmu_page(sp))
@@ -3624,7 +3589,9 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
                                           &invalid_list);
 
        if (free_active_root) {
-               if (to_shadow_page(mmu->root.hpa)) {
+               if (kvm_mmu_is_dummy_root(mmu->root.hpa)) {
+                       /* Nothing to cleanup for dummy roots. */
+               } else if (root_to_sp(mmu->root.hpa)) {
                        mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list);
                } else if (mmu->pae_root) {
                        for (i = 0; i < 4; ++i) {
@@ -3648,6 +3615,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
 void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
 {
        unsigned long roots_to_free = 0;
+       struct kvm_mmu_page *sp;
        hpa_t root_hpa;
        int i;
 
@@ -3662,8 +3630,8 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
                if (!VALID_PAGE(root_hpa))
                        continue;
 
-               if (!to_shadow_page(root_hpa) ||
-                       to_shadow_page(root_hpa)->role.guest_mode)
+               sp = root_to_sp(root_hpa);
+               if (!sp || sp->role.guest_mode)
                        roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
        }
 
@@ -3671,19 +3639,6 @@ void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots);
 
-
-static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
-{
-       int ret = 0;
-
-       if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
-               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-               ret = 1;
-       }
-
-       return ret;
-}
-
 static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant,
                            u8 level)
 {
@@ -3821,8 +3776,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
        root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
        root_gfn = root_pgd >> PAGE_SHIFT;
 
-       if (mmu_check_root(vcpu, root_gfn))
-               return 1;
+       if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
+               mmu->root.hpa = kvm_mmu_get_dummy_root();
+               return 0;
+       }
 
        /*
         * On SVM, reading PDPTRs might access guest memory, which might fault
@@ -3834,8 +3791,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                        if (!(pdptrs[i] & PT_PRESENT_MASK))
                                continue;
 
-                       if (mmu_check_root(vcpu, pdptrs[i] >> PAGE_SHIFT))
-                               return 1;
+                       if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT))
+                               pdptrs[i] = 0;
                }
        }
 
@@ -4002,7 +3959,7 @@ static bool is_unsync_root(hpa_t root)
 {
        struct kvm_mmu_page *sp;
 
-       if (!VALID_PAGE(root))
+       if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root))
                return false;
 
        /*
@@ -4018,7 +3975,7 @@ static bool is_unsync_root(hpa_t root)
         * requirement isn't satisfied.
         */
        smp_rmb();
-       sp = to_shadow_page(root);
+       sp = root_to_sp(root);
 
        /*
         * PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the
@@ -4048,11 +4005,12 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 
        if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) {
                hpa_t root = vcpu->arch.mmu->root.hpa;
-               sp = to_shadow_page(root);
 
                if (!is_unsync_root(root))
                        return;
 
+               sp = root_to_sp(root);
+
                write_lock(&vcpu->kvm->mmu_lock);
                mmu_sync_children(vcpu, sp, true);
                write_unlock(&vcpu->kvm->mmu_lock);
@@ -4194,7 +4152,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
                return RET_PF_EMULATE;
 
        reserved = get_mmio_spte(vcpu, addr, &spte);
-       if (WARN_ON(reserved))
+       if (WARN_ON_ONCE(reserved))
                return -EINVAL;
 
        if (is_mmio_spte(spte)) {
@@ -4232,7 +4190,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
         * guest is writing the page which is write tracked which can
         * not be fixed by page fault handler.
         */
-       if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
+       if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn))
                return true;
 
        return false;
@@ -4382,7 +4340,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
                                struct kvm_page_fault *fault)
 {
-       struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
+       struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
 
        /* Special roots, e.g. pae_root, are not backed by shadow pages. */
        if (sp && is_obsolete_sp(vcpu->kvm, sp))
@@ -4407,6 +4365,10 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 {
        int r;
 
+       /* Dummy roots are used only for shadowing bad guest roots. */
+       if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa)))
+               return RET_PF_RETRY;
+
        if (page_fault_handle_page_track(vcpu, fault))
                return RET_PF_EMULATE;
 
@@ -4443,8 +4405,6 @@ out_unlock:
 static int nonpaging_page_fault(struct kvm_vcpu *vcpu,
                                struct kvm_page_fault *fault)
 {
-       pgprintk("%s: gva %lx error %x\n", __func__, fault->addr, fault->error_code);
-
        /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
        fault->max_level = PG_LEVEL_2M;
        return direct_page_fault(vcpu, fault);
@@ -4562,9 +4522,19 @@ static void nonpaging_init_context(struct kvm_mmu *context)
 static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
                                  union kvm_mmu_page_role role)
 {
-       return (role.direct || pgd == root->pgd) &&
-              VALID_PAGE(root->hpa) &&
-              role.word == to_shadow_page(root->hpa)->role.word;
+       struct kvm_mmu_page *sp;
+
+       if (!VALID_PAGE(root->hpa))
+               return false;
+
+       if (!role.direct && pgd != root->pgd)
+               return false;
+
+       sp = root_to_sp(root->hpa);
+       if (WARN_ON_ONCE(!sp))
+               return false;
+
+       return role.word == sp->role.word;
 }
 
 /*
@@ -4634,11 +4604,10 @@ static bool fast_pgd_switch(struct kvm *kvm, struct kvm_mmu *mmu,
                            gpa_t new_pgd, union kvm_mmu_page_role new_role)
 {
        /*
-        * For now, limit the caching to 64-bit hosts+VMs in order to avoid
-        * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
-        * later if necessary.
+        * Limit reuse to 64-bit hosts+VMs without "special" roots in order to
+        * avoid having to deal with PDPTEs and other complexities.
         */
-       if (VALID_PAGE(mmu->root.hpa) && !to_shadow_page(mmu->root.hpa))
+       if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa))
                kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
 
        if (VALID_PAGE(mmu->root.hpa))
@@ -4684,9 +4653,12 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
         * If this is a direct root page, it doesn't have a write flooding
         * count. Otherwise, clear the write flooding count.
         */
-       if (!new_role.direct)
-               __clear_sp_write_flooding_count(
-                               to_shadow_page(vcpu->arch.mmu->root.hpa));
+       if (!new_role.direct) {
+               struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
+
+               if (!WARN_ON_ONCE(!sp))
+                       __clear_sp_write_flooding_count(sp);
+       }
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
 
@@ -4808,28 +4780,13 @@ static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
        }
 }
 
-static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu)
-{
-       /*
-        * If TDP is enabled, let the guest use GBPAGES if they're supported in
-        * hardware.  The hardware page walker doesn't let KVM disable GBPAGES,
-        * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
-        * walk for performance and complexity reasons.  Not to mention KVM
-        * _can't_ solve the problem because GVA->GPA walks aren't visible to
-        * KVM once a TDP translation is installed.  Mimic hardware behavior so
-        * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
-        */
-       return tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
-                            guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
-}
-
 static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
                                        struct kvm_mmu *context)
 {
        __reset_rsvds_bits_mask(&context->guest_rsvd_check,
                                vcpu->arch.reserved_gpa_bits,
                                context->cpu_role.base.level, is_efer_nx(context),
-                               guest_can_use_gbpages(vcpu),
+                               guest_can_use(vcpu, X86_FEATURE_GBPAGES),
                                is_cr4_pse(context),
                                guest_cpuid_is_amd_or_hygon(vcpu));
 }
@@ -4906,7 +4863,8 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
        __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
                                context->root_role.level,
                                context->root_role.efer_nx,
-                               guest_can_use_gbpages(vcpu), is_pse, is_amd);
+                               guest_can_use(vcpu, X86_FEATURE_GBPAGES),
+                               is_pse, is_amd);
 
        if (!shadow_me_mask)
                return;
@@ -5467,8 +5425,8 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
         * physical address properties) in a single VM would require tracking
         * all relevant CPUID information in kvm_mmu_page_role. That is very
         * undesirable as it would increase the memory requirements for
-        * gfn_track (see struct kvm_mmu_page_role comments).  For now that
-        * problem is swept under the rug; KVM's CPUID API is horrific and
+        * gfn_write_track (see struct kvm_mmu_page_role comments).  For now
+        * that problem is swept under the rug; KVM's CPUID API is horrific and
         * it's all but impossible to solve it without introducing a new API.
         */
        vcpu->arch.root_mmu.root_role.word = 0;
@@ -5531,9 +5489,9 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
        struct kvm *kvm = vcpu->kvm;
 
        kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
-       WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
+       WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
        kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
-       WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
+       WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
        vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 }
 
@@ -5546,16 +5504,21 @@ static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa)
 
        /*
         * When freeing obsolete roots, treat roots as obsolete if they don't
-        * have an associated shadow page.  This does mean KVM will get false
+        * have an associated shadow page, as it's impossible to determine if
+        * such roots are fresh or stale.  This does mean KVM will get false
         * positives and free roots that don't strictly need to be freed, but
         * such false positives are relatively rare:
         *
-        *  (a) only PAE paging and nested NPT has roots without shadow pages
+        *  (a) only PAE paging and nested NPT have roots without shadow pages
+        *      (or any shadow paging flavor with a dummy root, see note below)
         *  (b) remote reloads due to a memslot update obsoletes _all_ roots
         *  (c) KVM doesn't track previous roots for PAE paging, and the guest
         *      is unlikely to zap an in-use PGD.
+        *
+        * Note!  Dummy roots are unique in that they are obsoleted by memslot
+        * _creation_!  See also FNAME(fetch).
         */
-       sp = to_shadow_page(root_hpa);
+       sp = root_to_sp(root_hpa);
        return !sp || is_obsolete_sp(kvm, sp);
 }
 
@@ -5634,9 +5597,6 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
 {
        unsigned offset, pte_size, misaligned;
 
-       pgprintk("misaligned: gpa %llx bytes %d role %x\n",
-                gpa, bytes, sp->role.word);
-
        offset = offset_in_page(gpa);
        pte_size = sp->role.has_4_byte_gpte ? 4 : 8;
 
@@ -5684,9 +5644,8 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
        return spte;
 }
 
-static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-                             const u8 *new, int bytes,
-                             struct kvm_page_track_notifier_node *node)
+void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+                        int bytes)
 {
        gfn_t gfn = gpa >> PAGE_SHIFT;
        struct kvm_mmu_page *sp;
@@ -5702,8 +5661,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
                return;
 
-       pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
-
        write_lock(&vcpu->kvm->mmu_lock);
 
        gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
@@ -5742,7 +5699,18 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
        int r, emulation_type = EMULTYPE_PF;
        bool direct = vcpu->arch.mmu->root_role.direct;
 
-       if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+       /*
+        * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
+        * checks when emulating instructions that triggers implicit access.
+        * WARN if hardware generates a fault with an error code that collides
+        * with the KVM-defined value.  Clear the flag and continue on, i.e.
+        * don't terminate the VM, as KVM can't possibly be relying on a flag
+        * that KVM doesn't know about.
+        */
+       if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
+               error_code &= ~PFERR_IMPLICIT_ACCESS;
+
+       if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
                return RET_PF_RETRY;
 
        r = RET_PF_INVALID;
@@ -6099,7 +6067,7 @@ restart:
                 * pages.  Skip the bogus page, otherwise we'll get stuck in an
                 * infinite loop if the page gets put back on the list (again).
                 */
-               if (WARN_ON(sp->role.invalid))
+               if (WARN_ON_ONCE(sp->role.invalid))
                        continue;
 
                /*
@@ -6199,16 +6167,8 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
        return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
 }
 
-static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
-                       struct kvm_memory_slot *slot,
-                       struct kvm_page_track_notifier_node *node)
-{
-       kvm_mmu_zap_all_fast(kvm);
-}
-
 int kvm_mmu_init_vm(struct kvm *kvm)
 {
-       struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
        int r;
 
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
@@ -6222,10 +6182,6 @@ int kvm_mmu_init_vm(struct kvm *kvm)
                        return r;
        }
 
-       node->track_write = kvm_mmu_pte_write;
-       node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
-       kvm_page_track_register_notifier(kvm, node);
-
        kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
        kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
 
@@ -6246,10 +6202,6 @@ static void mmu_free_vm_memory_caches(struct kvm *kvm)
 
 void kvm_mmu_uninit_vm(struct kvm *kvm)
 {
-       struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
-
-       kvm_page_track_unregister_notifier(kvm, node);
-
        if (tdp_mmu_enabled)
                kvm_mmu_uninit_tdp_mmu(kvm);
 
@@ -6670,7 +6622,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
         */
        if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
                            PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
-               kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+               kvm_flush_remote_tlbs_memslot(kvm, slot);
 }
 
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
@@ -6689,20 +6641,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
        }
 }
 
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot)
-{
-       /*
-        * All current use cases for flushing the TLBs for a specific memslot
-        * related to dirty logging, and many do the TLB flush out of mmu_lock.
-        * The interaction between the various operations on memslot must be
-        * serialized by slots_locks to ensure the TLB flush from one operation
-        * is observed by any other operation on the same memslot.
-        */
-       lockdep_assert_held(&kvm->slots_lock);
-       kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
-}
-
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot)
 {
@@ -6732,7 +6670,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
         */
 }
 
-void kvm_mmu_zap_all(struct kvm *kvm)
+static void kvm_mmu_zap_all(struct kvm *kvm)
 {
        struct kvm_mmu_page *sp, *node;
        LIST_HEAD(invalid_list);
@@ -6741,7 +6679,7 @@ void kvm_mmu_zap_all(struct kvm *kvm)
        write_lock(&kvm->mmu_lock);
 restart:
        list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
-               if (WARN_ON(sp->role.invalid))
+               if (WARN_ON_ONCE(sp->role.invalid))
                        continue;
                if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
                        goto restart;
@@ -6757,9 +6695,20 @@ restart:
        write_unlock(&kvm->mmu_lock);
 }
 
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+       kvm_mmu_zap_all(kvm);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+                                  struct kvm_memory_slot *slot)
+{
+       kvm_mmu_zap_all_fast(kvm);
+}
+
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 {
-       WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
+       WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
 
        gen &= MMIO_SPTE_GEN_MASK;
 
@@ -6862,7 +6811,7 @@ static void mmu_destroy_caches(void)
 static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
 {
        if (nx_hugepage_mitigation_hard_disabled)
-               return sprintf(buffer, "never\n");
+               return sysfs_emit(buffer, "never\n");
 
        return param_get_bool(buffer, kp);
 }
index d39af56..b102014 100644 (file)
@@ -6,18 +6,10 @@
 #include <linux/kvm_host.h>
 #include <asm/kvm_host.h>
 
-#undef MMU_DEBUG
-
-#ifdef MMU_DEBUG
-extern bool dbg;
-
-#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
-#define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
-#define MMU_WARN_ON(x) WARN_ON(x)
+#ifdef CONFIG_KVM_PROVE_MMU
+#define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x)
 #else
-#define pgprintk(x...) do { } while (0)
-#define rmap_printk(x...) do { } while (0)
-#define MMU_WARN_ON(x) do { } while (0)
+#define KVM_MMU_WARN_ON(x) BUILD_BUG_ON_INVALID(x)
 #endif
 
 /* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
@@ -44,6 +36,16 @@ extern bool dbg;
 #define INVALID_PAE_ROOT       0
 #define IS_VALID_PAE_ROOT(x)   (!!(x))
 
+static inline hpa_t kvm_mmu_get_dummy_root(void)
+{
+       return my_zero_pfn(0) << PAGE_SHIFT;
+}
+
+static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page)
+{
+       return is_zero_pfn(shadow_page >> PAGE_SHIFT);
+}
+
 typedef u64 __rcu *tdp_ptep_t;
 
 struct kvm_mmu_page {
@@ -170,9 +172,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
                                    struct kvm_memory_slot *slot, u64 gfn,
                                    int min_level);
 
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
-                                gfn_t nr_pages);
-
 /* Flush the given page (huge or not) of guest memory. */
 static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
 {
index 0a2ac43..c87da11 100644 (file)
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/lockdep.h>
 #include <linux/kvm_host.h>
 #include <linux/rculist.h>
 
-#include <asm/kvm_page_track.h>
-
 #include "mmu.h"
 #include "mmu_internal.h"
+#include "page_track.h"
 
 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
 {
@@ -28,103 +28,64 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
 {
-       int i;
-
-       for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
-               kvfree(slot->arch.gfn_track[i]);
-               slot->arch.gfn_track[i] = NULL;
-       }
+       kvfree(slot->arch.gfn_write_track);
+       slot->arch.gfn_write_track = NULL;
 }
 
-int kvm_page_track_create_memslot(struct kvm *kvm,
-                                 struct kvm_memory_slot *slot,
-                                 unsigned long npages)
+static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
+                                                unsigned long npages)
 {
-       int i;
-
-       for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
-               if (i == KVM_PAGE_TRACK_WRITE &&
-                   !kvm_page_track_write_tracking_enabled(kvm))
-                       continue;
-
-               slot->arch.gfn_track[i] =
-                       __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
-                                 GFP_KERNEL_ACCOUNT);
-               if (!slot->arch.gfn_track[i])
-                       goto track_free;
-       }
+       const size_t size = sizeof(*slot->arch.gfn_write_track);
 
-       return 0;
+       if (!slot->arch.gfn_write_track)
+               slot->arch.gfn_write_track = __vcalloc(npages, size,
+                                                      GFP_KERNEL_ACCOUNT);
 
-track_free:
-       kvm_page_track_free_memslot(slot);
-       return -ENOMEM;
+       return slot->arch.gfn_write_track ? 0 : -ENOMEM;
 }
 
-static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
+int kvm_page_track_create_memslot(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot,
+                                 unsigned long npages)
 {
-       if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
-               return false;
+       if (!kvm_page_track_write_tracking_enabled(kvm))
+               return 0;
 
-       return true;
+       return __kvm_page_track_write_tracking_alloc(slot, npages);
 }
 
 int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
 {
-       unsigned short *gfn_track;
-
-       if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE])
-               return 0;
-
-       gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track),
-                             GFP_KERNEL_ACCOUNT);
-       if (gfn_track == NULL)
-               return -ENOMEM;
-
-       slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track;
-       return 0;
+       return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
 }
 
-static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
-                            enum kvm_page_track_mode mode, short count)
+static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
+                                  short count)
 {
        int index, val;
 
        index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
 
-       val = slot->arch.gfn_track[mode][index];
+       val = slot->arch.gfn_write_track[index];
 
-       if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
+       if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
                return;
 
-       slot->arch.gfn_track[mode][index] += count;
+       slot->arch.gfn_write_track[index] += count;
 }
 
-/*
- * add guest page to the tracking pool so that corresponding access on that
- * page will be intercepted.
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_add_page(struct kvm *kvm,
-                                 struct kvm_memory_slot *slot, gfn_t gfn,
-                                 enum kvm_page_track_mode mode)
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+                              gfn_t gfn)
 {
+       lockdep_assert_held_write(&kvm->mmu_lock);
 
-       if (WARN_ON(!page_track_mode_is_valid(mode)))
-               return;
+       lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
+                           srcu_read_lock_held(&kvm->srcu));
 
-       if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
-                   !kvm_page_track_write_tracking_enabled(kvm)))
+       if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
                return;
 
-       update_gfn_track(slot, gfn, mode, 1);
+       update_gfn_write_track(slot, gfn, 1);
 
        /*
         * new track stops large page mapping for the
@@ -132,37 +93,22 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
         */
        kvm_mmu_gfn_disallow_lpage(slot, gfn);
 
-       if (mode == KVM_PAGE_TRACK_WRITE)
-               if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
-                       kvm_flush_remote_tlbs(kvm);
+       if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
+               kvm_flush_remote_tlbs(kvm);
 }
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
 
-/*
- * remove the guest page from the tracking pool which stops the interception
- * of corresponding access on that page. It is the opposed operation of
- * kvm_slot_page_track_add_page().
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
-                                    struct kvm_memory_slot *slot, gfn_t gfn,
-                                    enum kvm_page_track_mode mode)
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot, gfn_t gfn)
 {
-       if (WARN_ON(!page_track_mode_is_valid(mode)))
-               return;
+       lockdep_assert_held_write(&kvm->mmu_lock);
 
-       if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
-                   !kvm_page_track_write_tracking_enabled(kvm)))
+       lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
+                           srcu_read_lock_held(&kvm->srcu));
+
+       if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
                return;
 
-       update_gfn_track(slot, gfn, mode, -1);
+       update_gfn_write_track(slot, gfn, -1);
 
        /*
         * allow large page mapping for the tracked page
@@ -170,31 +116,26 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
         */
        kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
 
 /*
  * check if the corresponding access on the specified guest page is tracked.
  */
-bool kvm_slot_page_track_is_active(struct kvm *kvm,
-                                  const struct kvm_memory_slot *slot,
-                                  gfn_t gfn, enum kvm_page_track_mode mode)
+bool kvm_gfn_is_write_tracked(struct kvm *kvm,
+                             const struct kvm_memory_slot *slot, gfn_t gfn)
 {
        int index;
 
-       if (WARN_ON(!page_track_mode_is_valid(mode)))
-               return false;
-
        if (!slot)
                return false;
 
-       if (mode == KVM_PAGE_TRACK_WRITE &&
-           !kvm_page_track_write_tracking_enabled(kvm))
+       if (!kvm_page_track_write_tracking_enabled(kvm))
                return false;
 
        index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
-       return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
+       return !!READ_ONCE(slot->arch.gfn_write_track[index]);
 }
 
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
 void kvm_page_track_cleanup(struct kvm *kvm)
 {
        struct kvm_page_track_notifier_head *head;
@@ -216,17 +157,22 @@ int kvm_page_track_init(struct kvm *kvm)
  * register the notifier so that event interception for the tracked guest
  * pages can be received.
  */
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
-                                struct kvm_page_track_notifier_node *n)
+int kvm_page_track_register_notifier(struct kvm *kvm,
+                                    struct kvm_page_track_notifier_node *n)
 {
        struct kvm_page_track_notifier_head *head;
 
+       if (!kvm || kvm->mm != current->mm)
+               return -ESRCH;
+
+       kvm_get_kvm(kvm);
+
        head = &kvm->arch.track_notifier_head;
 
        write_lock(&kvm->mmu_lock);
        hlist_add_head_rcu(&n->node, &head->track_notifier_list);
        write_unlock(&kvm->mmu_lock);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
 
@@ -234,9 +180,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
  * stop receiving the event interception. It is the opposed operation of
  * kvm_page_track_register_notifier().
  */
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
-                                  struct kvm_page_track_notifier_node *n)
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+                                       struct kvm_page_track_notifier_node *n)
 {
        struct kvm_page_track_notifier_head *head;
 
@@ -246,6 +191,8 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
        hlist_del_rcu(&n->node);
        write_unlock(&kvm->mmu_lock);
        synchronize_srcu(&head->track_srcu);
+
+       kvm_put_kvm(kvm);
 }
 EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
 
@@ -256,34 +203,30 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
  * The node should figure out if the written page is the one that node is
  * interested in by itself.
  */
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-                         int bytes)
+void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
 {
        struct kvm_page_track_notifier_head *head;
        struct kvm_page_track_notifier_node *n;
        int idx;
 
-       head = &vcpu->kvm->arch.track_notifier_head;
+       head = &kvm->arch.track_notifier_head;
 
        if (hlist_empty(&head->track_notifier_list))
                return;
 
        idx = srcu_read_lock(&head->track_srcu);
        hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
-                               srcu_read_lock_held(&head->track_srcu))
+                                 srcu_read_lock_held(&head->track_srcu))
                if (n->track_write)
-                       n->track_write(vcpu, gpa, new, bytes, n);
+                       n->track_write(gpa, new, bytes, n);
        srcu_read_unlock(&head->track_srcu, idx);
 }
 
 /*
- * Notify the node that memory slot is being removed or moved so that it can
- * drop write-protection for the pages in the memory slot.
- *
- * The node should figure out it has any write-protected pages in this slot
- * by itself.
+ * Notify external page track nodes that a memory region is being removed from
+ * the VM, e.g. so that users can free any associated metadata.
  */
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
        struct kvm_page_track_notifier_head *head;
        struct kvm_page_track_notifier_node *n;
@@ -296,8 +239,69 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
 
        idx = srcu_read_lock(&head->track_srcu);
        hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
-                               srcu_read_lock_held(&head->track_srcu))
-               if (n->track_flush_slot)
-                       n->track_flush_slot(kvm, slot, n);
+                                 srcu_read_lock_held(&head->track_srcu))
+               if (n->track_remove_region)
+                       n->track_remove_region(slot->base_gfn, slot->npages, n);
        srcu_read_unlock(&head->track_srcu, idx);
 }
+
+/*
+ * add guest page to the tracking pool so that corresponding access on that
+ * page will be intercepted.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
+{
+       struct kvm_memory_slot *slot;
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+
+       slot = gfn_to_memslot(kvm, gfn);
+       if (!slot) {
+               srcu_read_unlock(&kvm->srcu, idx);
+               return -EINVAL;
+       }
+
+       write_lock(&kvm->mmu_lock);
+       __kvm_write_track_add_gfn(kvm, slot, gfn);
+       write_unlock(&kvm->mmu_lock);
+
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
+
+/*
+ * remove the guest page from the tracking pool which stops the interception
+ * of corresponding access on that page.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
+{
+       struct kvm_memory_slot *slot;
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+
+       slot = gfn_to_memslot(kvm, gfn);
+       if (!slot) {
+               srcu_read_unlock(&kvm->srcu, idx);
+               return -EINVAL;
+       }
+
+       write_lock(&kvm->mmu_lock);
+       __kvm_write_track_remove_gfn(kvm, slot, gfn);
+       write_unlock(&kvm->mmu_lock);
+
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
+#endif
diff --git a/arch/x86/kvm/mmu/page_track.h b/arch/x86/kvm/mmu/page_track.h
new file mode 100644 (file)
index 0000000..d4d72ed
--- /dev/null
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_PAGE_TRACK_H
+#define __KVM_X86_PAGE_TRACK_H
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_page_track.h>
+
+
+bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
+int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
+
+void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
+int kvm_page_track_create_memslot(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot,
+                                 unsigned long npages);
+
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+                              gfn_t gfn);
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+                                 struct kvm_memory_slot *slot, gfn_t gfn);
+
+bool kvm_gfn_is_write_tracked(struct kvm *kvm,
+                             const struct kvm_memory_slot *slot, gfn_t gfn);
+
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+int kvm_page_track_init(struct kvm *kvm);
+void kvm_page_track_cleanup(struct kvm *kvm);
+
+void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes);
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
+
+static inline bool kvm_page_track_has_external_user(struct kvm *kvm)
+{
+       return !hlist_empty(&kvm->arch.track_notifier_head.track_notifier_list);
+}
+#else
+static inline int kvm_page_track_init(struct kvm *kvm) { return 0; }
+static inline void kvm_page_track_cleanup(struct kvm *kvm) { }
+
+static inline void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa,
+                                         const u8 *new, int bytes) { }
+static inline void kvm_page_track_delete_slot(struct kvm *kvm,
+                                             struct kvm_memory_slot *slot) { }
+
+static inline bool kvm_page_track_has_external_user(struct kvm *kvm) { return false; }
+
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
+
+static inline void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+                                       const u8 *new, int bytes)
+{
+       __kvm_page_track_write(vcpu->kvm, gpa, new, bytes);
+
+       kvm_mmu_track_write(vcpu, gpa, new, bytes);
+}
+
+#endif /* __KVM_X86_PAGE_TRACK_H */
index 0662e02..c852550 100644 (file)
@@ -338,7 +338,6 @@ retry_walk:
        }
 #endif
        walker->max_level = walker->level;
-       ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
 
        /*
         * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
@@ -348,9 +347,21 @@ retry_walk:
        nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
 
        pte_access = ~0;
+
+       /*
+        * Queue a page fault for injection if this assertion fails, as callers
+        * assume that walker.fault contains sane info on a walk failure.  I.e.
+        * avoid making the situation worse by inducing even worse badness
+        * between when the assertion fails and when KVM kicks the vCPU out to
+        * userspace (because the VM is bugged).
+        */
+       if (KVM_BUG_ON(is_long_mode(vcpu) && !is_pae(vcpu), vcpu->kvm))
+               goto error;
+
        ++walker->level;
 
        do {
+               struct kvm_memory_slot *slot;
                unsigned long host_addr;
 
                pt_access = pte_access;
@@ -381,7 +392,11 @@ retry_walk:
                if (unlikely(real_gpa == INVALID_GPA))
                        return 0;
 
-               host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa),
+               slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(real_gpa));
+               if (!kvm_is_visible_memslot(slot))
+                       goto error;
+
+               host_addr = gfn_to_hva_memslot_prot(slot, gpa_to_gfn(real_gpa),
                                            &walker->pte_writable[walker->level - 1]);
                if (unlikely(kvm_is_error_hva(host_addr)))
                        goto error;
@@ -456,9 +471,6 @@ retry_walk:
                        goto retry_walk;
        }
 
-       pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-                __func__, (u64)pte, walker->pte_access,
-                walker->pt_access[walker->level - 1]);
        return 1;
 
 error:
@@ -529,8 +541,6 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
        if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
                return false;
 
-       pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-
        gfn = gpte_to_gfn(gpte);
        pte_access = sp->role.access & FNAME(gpte_access)(gpte);
        FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
@@ -638,8 +648,19 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
        if (FNAME(gpte_changed)(vcpu, gw, top_level))
                goto out_gpte_changed;
 
-       if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+       if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+               goto out_gpte_changed;
+
+       /*
+        * Load a new root and retry the faulting instruction in the extremely
+        * unlikely scenario that the guest root gfn became visible between
+        * loading a dummy root and handling the resulting page fault, e.g. if
+        * userspace create a memslot in the interim.
+        */
+       if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) {
+               kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu);
                goto out_gpte_changed;
+       }
 
        for_each_shadow_entry(vcpu, fault->addr, it) {
                gfn_t table_gfn;
@@ -758,7 +779,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
        struct guest_walker walker;
        int r;
 
-       pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
        WARN_ON_ONCE(fault->is_tdp);
 
        /*
@@ -773,7 +793,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
         * The page is not mapped by the guest.  Let the guest handle it.
         */
        if (!r) {
-               pgprintk("%s: guest page fault\n", __func__);
                if (!fault->prefetch)
                        kvm_inject_emulated_page_fault(vcpu, &walker.fault);
 
@@ -837,7 +856,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
 {
        int offset = 0;
 
-       WARN_ON(sp->role.level != PG_LEVEL_4K);
+       WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
 
        if (PTTYPE == 32)
                offset = sp->role.quadrant << SPTE_LEVEL_BITS;
index cf2c642..4a59913 100644 (file)
@@ -61,7 +61,7 @@ static u64 generation_mmio_spte_mask(u64 gen)
 {
        u64 mask;
 
-       WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+       WARN_ON_ONCE(gen & ~MMIO_SPTE_GEN_MASK);
 
        mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
        mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
@@ -221,8 +221,6 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                 * shadow pages and unsync'ing pages is not allowed.
                 */
                if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) {
-                       pgprintk("%s: found shadow page for %llx, marking ro\n",
-                                __func__, gfn);
                        wrprot = true;
                        pte_access &= ~ACC_WRITE_MASK;
                        spte &= ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
@@ -242,7 +240,7 @@ out:
 
        if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
                /* Enforced by kvm_mmu_hugepage_adjust. */
-               WARN_ON(level > PG_LEVEL_4K);
+               WARN_ON_ONCE(level > PG_LEVEL_4K);
                mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
        }
 
index 1279db2..a129951 100644 (file)
@@ -3,6 +3,7 @@
 #ifndef KVM_X86_MMU_SPTE_H
 #define KVM_X86_MMU_SPTE_H
 
+#include "mmu.h"
 #include "mmu_internal.h"
 
 /*
@@ -236,6 +237,18 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
        return to_shadow_page(__pa(sptep));
 }
 
+static inline struct kvm_mmu_page *root_to_sp(hpa_t root)
+{
+       if (kvm_mmu_is_dummy_root(root))
+               return NULL;
+
+       /*
+        * The "root" may be a special root, e.g. a PAE entry, treat it as a
+        * SPTE to ensure any non-PA bits are dropped.
+        */
+       return spte_to_child_sp(root);
+}
+
 static inline bool is_mmio_spte(u64 spte)
 {
        return (spte & shadow_mmio_mask) == shadow_mmio_value &&
@@ -265,13 +278,13 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
 
 static inline bool spte_ad_enabled(u64 spte)
 {
-       MMU_WARN_ON(!is_shadow_present_pte(spte));
+       KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
        return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED;
 }
 
 static inline bool spte_ad_need_write_protect(u64 spte)
 {
-       MMU_WARN_ON(!is_shadow_present_pte(spte));
+       KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
        /*
         * This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0',
         * and non-TDP SPTEs will never set these bits.  Optimize for 64-bit
@@ -282,13 +295,13 @@ static inline bool spte_ad_need_write_protect(u64 spte)
 
 static inline u64 spte_shadow_accessed_mask(u64 spte)
 {
-       MMU_WARN_ON(!is_shadow_present_pte(spte));
+       KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
        return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
 }
 
 static inline u64 spte_shadow_dirty_mask(u64 spte)
 {
-       MMU_WARN_ON(!is_shadow_present_pte(spte));
+       KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
        return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
 }
 
index d2eb0d4..bd30ebf 100644 (file)
@@ -39,13 +39,14 @@ void tdp_iter_restart(struct tdp_iter *iter)
 void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
                    int min_level, gfn_t next_last_level_gfn)
 {
-       int root_level = root->role.level;
-
-       WARN_ON(root_level < 1);
-       WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
+       if (WARN_ON_ONCE(!root || (root->role.level < 1) ||
+                        (root->role.level > PT64_ROOT_MAX_LEVEL))) {
+               iter->valid = false;
+               return;
+       }
 
        iter->next_last_level_gfn = next_last_level_gfn;
-       iter->root_level = root_level;
+       iter->root_level = root->role.level;
        iter->min_level = min_level;
        iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
        iter->as_id = kvm_mmu_page_as_id(root);
index 512163d..6c63f2d 100644 (file)
@@ -475,9 +475,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
        bool is_leaf = is_present && is_last_spte(new_spte, level);
        bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
 
-       WARN_ON(level > PT64_ROOT_MAX_LEVEL);
-       WARN_ON(level < PG_LEVEL_4K);
-       WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
+       WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL);
+       WARN_ON_ONCE(level < PG_LEVEL_4K);
+       WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
 
        /*
         * If this warning were to trigger it would indicate that there was a
@@ -522,9 +522,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
                 * impact the guest since both the former and current SPTEs
                 * are nonpresent.
                 */
-               if (WARN_ON(!is_mmio_spte(old_spte) &&
-                           !is_mmio_spte(new_spte) &&
-                           !is_removed_spte(new_spte)))
+               if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
+                                !is_mmio_spte(new_spte) &&
+                                !is_removed_spte(new_spte)))
                        pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
                               "should not be replaced with another,\n"
                               "different nonpresent SPTE, unless one or both\n"
@@ -661,7 +661,7 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
         * should be used. If operating under the MMU lock in write mode, the
         * use of the removed SPTE should not be necessary.
         */
-       WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte));
+       WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte));
 
        old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
 
@@ -689,7 +689,7 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
                else
 
 #define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end)                \
-       for_each_tdp_pte(_iter, to_shadow_page(_mmu->root.hpa), _start, _end)
+       for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
 
 /*
  * Yield if the MMU lock is contended or this thread needs to return control
@@ -709,7 +709,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
                                                          struct tdp_iter *iter,
                                                          bool flush, bool shared)
 {
-       WARN_ON(iter->yielded);
+       WARN_ON_ONCE(iter->yielded);
 
        /* Ensure forward progress has been made before yielding. */
        if (iter->next_last_level_gfn == iter->yielded_gfn)
@@ -728,7 +728,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
 
                rcu_read_lock();
 
-               WARN_ON(iter->gfn > iter->next_last_level_gfn);
+               WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
 
                iter->yielded = true;
        }
@@ -1241,7 +1241,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
        u64 new_spte;
 
        /* Huge pages aren't expected to be modified without first being zapped. */
-       WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end);
+       WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
 
        if (iter->level != PG_LEVEL_4K ||
            !is_shadow_present_pte(iter->old_spte))
@@ -1255,9 +1255,9 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
         */
        tdp_mmu_iter_set_spte(kvm, iter, 0);
 
-       if (!pte_write(range->pte)) {
+       if (!pte_write(range->arg.pte)) {
                new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
-                                                                 pte_pfn(range->pte));
+                                                                 pte_pfn(range->arg.pte));
 
                tdp_mmu_iter_set_spte(kvm, iter, new_spte);
        }
@@ -1548,8 +1548,8 @@ retry:
                if (!is_shadow_present_pte(iter.old_spte))
                        continue;
 
-               MMU_WARN_ON(kvm_ad_enabled() &&
-                           spte_ad_need_write_protect(iter.old_spte));
+               KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+                               spte_ad_need_write_protect(iter.old_spte));
 
                if (!(iter.old_spte & dbit))
                        continue;
@@ -1600,6 +1600,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
                                                   shadow_dirty_mask;
        struct tdp_iter iter;
 
+       lockdep_assert_held_write(&kvm->mmu_lock);
+
        rcu_read_lock();
 
        tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
@@ -1607,8 +1609,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
                if (!mask)
                        break;
 
-               MMU_WARN_ON(kvm_ad_enabled() &&
-                           spte_ad_need_write_protect(iter.old_spte));
+               KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+                               spte_ad_need_write_protect(iter.old_spte));
 
                if (iter.level > PG_LEVEL_4K ||
                    !(mask & (1UL << (iter.gfn - gfn))))
@@ -1646,7 +1648,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 {
        struct kvm_mmu_page *root;
 
-       lockdep_assert_held_write(&kvm->mmu_lock);
        for_each_tdp_mmu_root(kvm, root, slot->as_id)
                clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
 }
index bf653df..edb89b5 100644 (file)
@@ -382,9 +382,6 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
        struct kvm_x86_pmu_event_filter *filter;
        struct kvm *kvm = pmc->vcpu->kvm;
 
-       if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
-               return false;
-
        filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
        if (!filter)
                return true;
@@ -398,6 +395,7 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
 static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
 {
        return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
+              static_call(kvm_x86_pmu_hw_event_available)(pmc) &&
               check_pmu_event_filter(pmc);
 }
 
index 56cbdb2..b816506 100644 (file)
@@ -43,6 +43,7 @@ enum kvm_only_cpuid_leafs {
 /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
 #define X86_FEATURE_AVX_VNNI_INT8       KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
 #define X86_FEATURE_AVX_NE_CONVERT      KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
+#define X86_FEATURE_AMX_COMPLEX         KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
 #define X86_FEATURE_PREFETCHITI         KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
 
 /* CPUID level 0x80000007 (EDX). */
index cfc8ab7..2092db8 100644 (file)
@@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
        int ret = 0;
        unsigned long flags;
        struct amd_svm_iommu_ir *ir;
+       u64 entry;
 
        /**
         * In some cases, the existing irte is updated and re-set,
@@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
        ir->data = pi->ir_data;
 
        spin_lock_irqsave(&svm->ir_list_lock, flags);
+
+       /*
+        * Update the target pCPU for IOMMU doorbells if the vCPU is running.
+        * If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
+        * will update the pCPU info when the vCPU awkened and/or scheduled in.
+        * See also avic_vcpu_load().
+        */
+       entry = READ_ONCE(*(svm->avic_physical_id_cache));
+       if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
+               amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
+                                   true, pi->ir_data);
+
        list_add(&ir->node, &svm->ir_list);
        spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 out:
@@ -986,10 +999,11 @@ static inline int
 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
 {
        int ret = 0;
-       unsigned long flags;
        struct amd_svm_iommu_ir *ir;
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       lockdep_assert_held(&svm->ir_list_lock);
+
        if (!kvm_arch_has_assigned_device(vcpu->kvm))
                return 0;
 
@@ -997,19 +1011,15 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
         * Here, we go through the per-vcpu ir_list to update all existing
         * interrupt remapping table entry targeting this vcpu.
         */
-       spin_lock_irqsave(&svm->ir_list_lock, flags);
-
        if (list_empty(&svm->ir_list))
-               goto out;
+               return 0;
 
        list_for_each_entry(ir, &svm->ir_list, node) {
                ret = amd_iommu_update_ga(cpu, r, ir->data);
                if (ret)
-                       break;
+                       return ret;
        }
-out:
-       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
-       return ret;
+       return 0;
 }
 
 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1017,6 +1027,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        u64 entry;
        int h_physical_id = kvm_cpu_get_apicid(cpu);
        struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long flags;
 
        lockdep_assert_preemption_disabled();
 
@@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        if (kvm_vcpu_is_blocking(vcpu))
                return;
 
+       /*
+        * Grab the per-vCPU interrupt remapping lock even if the VM doesn't
+        * _currently_ have assigned devices, as that can change.  Holding
+        * ir_list_lock ensures that either svm_ir_list_add() will consume
+        * up-to-date entry information, or that this task will wait until
+        * svm_ir_list_add() completes to set the new target pCPU.
+        */
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+
        entry = READ_ONCE(*(svm->avic_physical_id_cache));
        WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
 
@@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
        avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
+
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
 }
 
 void avic_vcpu_put(struct kvm_vcpu *vcpu)
 {
        u64 entry;
        struct vcpu_svm *svm = to_svm(vcpu);
+       unsigned long flags;
 
        lockdep_assert_preemption_disabled();
 
+       /*
+        * Note, reading the Physical ID entry outside of ir_list_lock is safe
+        * as only the pCPU that has loaded (or is loading) the vCPU is allowed
+        * to modify the entry, and preemption is disabled.  I.e. the vCPU
+        * can't be scheduled out and thus avic_vcpu_{put,load}() can't run
+        * recursively.
+        */
        entry = READ_ONCE(*(svm->avic_physical_id_cache));
 
        /* Nothing to do if IsRunning == '0' due to vCPU blocking. */
        if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
                return;
 
+       /*
+        * Take and hold the per-vCPU interrupt remapping lock while updating
+        * the Physical ID entry even though the lock doesn't protect against
+        * multiple writers (see above).  Holding ir_list_lock ensures that
+        * either svm_ir_list_add() will consume up-to-date entry information,
+        * or that this task will wait until svm_ir_list_add() completes to
+        * mark the vCPU as not running.
+        */
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+
        avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
 
        entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+
 }
 
 void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
index 96936dd..dd496c9 100644 (file)
@@ -107,7 +107,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
 
 static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
 {
-       if (!svm->v_vmload_vmsave_enabled)
+       if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
                return true;
 
        if (!nested_npt_enabled(svm))
@@ -552,6 +552,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
        bool new_vmcb12 = false;
        struct vmcb *vmcb01 = svm->vmcb01.ptr;
        struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
 
        nested_vmcb02_compute_g_pat(svm);
 
@@ -577,18 +578,18 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
                vmcb_mark_dirty(vmcb02, VMCB_DT);
        }
 
-       kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
+       kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
 
-       svm_set_efer(&svm->vcpu, svm->nested.save.efer);
+       svm_set_efer(vcpu, svm->nested.save.efer);
 
-       svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
-       svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
+       svm_set_cr0(vcpu, svm->nested.save.cr0);
+       svm_set_cr4(vcpu, svm->nested.save.cr4);
 
        svm->vcpu.arch.cr2 = vmcb12->save.cr2;
 
-       kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
-       kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
-       kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
+       kvm_rax_write(vcpu, vmcb12->save.rax);
+       kvm_rsp_write(vcpu, vmcb12->save.rsp);
+       kvm_rip_write(vcpu, vmcb12->save.rip);
 
        /* In case we don't even reach vcpu_run, the fields are not updated */
        vmcb02->save.rax = vmcb12->save.rax;
@@ -602,7 +603,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
                vmcb_mark_dirty(vmcb02, VMCB_DR);
        }
 
-       if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+       if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+                    (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
                /*
                 * Reserved bits of DEBUGCTL are ignored.  Be consistent with
                 * svm_set_msr's definition of reserved bits.
@@ -658,7 +660,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
         * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
         */
 
-       if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
+       if (guest_can_use(vcpu, X86_FEATURE_VGIF) &&
+           (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
                int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
        else
                int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
@@ -695,10 +698,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 
        vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
 
-       if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
-               WARN_ON(!svm->tsc_scaling_enabled);
+       if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+           svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
                nested_svm_update_tsc_ratio_msr(vcpu);
-       }
 
        vmcb02->control.int_ctl             =
                (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
@@ -717,7 +719,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
         * what a nrips=0 CPU would do (L1 is responsible for advancing RIP
         * prior to injecting the event).
         */
-       if (svm->nrips_enabled)
+       if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
                vmcb02->control.next_rip    = svm->nested.ctl.next_rip;
        else if (boot_cpu_has(X86_FEATURE_NRIPS))
                vmcb02->control.next_rip    = vmcb12_rip;
@@ -727,7 +729,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
                svm->soft_int_injected = true;
                svm->soft_int_csbase = vmcb12_csbase;
                svm->soft_int_old_rip = vmcb12_rip;
-               if (svm->nrips_enabled)
+               if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
                        svm->soft_int_next_rip = svm->nested.ctl.next_rip;
                else
                        svm->soft_int_next_rip = vmcb12_rip;
@@ -735,15 +737,21 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
 
        vmcb02->control.virt_ext            = vmcb01->control.virt_ext &
                                              LBR_CTL_ENABLE_MASK;
-       if (svm->lbrv_enabled)
+       if (guest_can_use(vcpu, X86_FEATURE_LBRV))
                vmcb02->control.virt_ext  |=
                        (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
 
        if (!nested_vmcb_needs_vls_intercept(svm))
                vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
 
-       pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
-       pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
+       if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER))
+               pause_count12 = svm->nested.ctl.pause_filter_count;
+       else
+               pause_count12 = 0;
+       if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD))
+               pause_thresh12 = svm->nested.ctl.pause_filter_thresh;
+       else
+               pause_thresh12 = 0;
        if (kvm_pause_in_guest(svm->vcpu.kvm)) {
                /* use guest values since host doesn't intercept PAUSE */
                vmcb02->control.pause_filter_count = pause_count12;
@@ -1027,7 +1035,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        if (vmcb12->control.exit_code != SVM_EXIT_ERR)
                nested_save_pending_event_to_vmcb12(svm, vmcb12);
 
-       if (svm->nrips_enabled)
+       if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
                vmcb12->control.next_rip  = vmcb02->control.next_rip;
 
        vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
@@ -1066,7 +1074,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        if (!nested_exit_on_intr(svm))
                kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 
-       if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+       if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+                    (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
                svm_copy_lbrs(vmcb12, vmcb02);
                svm_update_lbrv(vcpu);
        } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
@@ -1101,10 +1110,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
                vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
        }
 
-       if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
-               WARN_ON(!svm->tsc_scaling_enabled);
+       if (kvm_caps.has_tsc_control &&
+           vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) {
                vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
-               __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+               svm_write_tsc_multiplier(vcpu);
        }
 
        svm->nested.ctl.nested_cr3 = 0;
@@ -1537,7 +1546,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
        vcpu->arch.tsc_scaling_ratio =
                kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
                                               svm->tsc_ratio_msr);
-       __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+       svm_write_tsc_multiplier(vcpu);
 }
 
 /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
index d3aec1f..b9a0a93 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/pkru.h>
 #include <asm/trapnr.h>
 #include <asm/fpu/xcr.h>
+#include <asm/debugreg.h>
 
 #include "mmu.h"
 #include "x86.h"
@@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444);
 /* enable/disable SEV-ES support */
 static bool sev_es_enabled = true;
 module_param_named(sev_es, sev_es_enabled, bool, 0444);
+
+/* enable/disable SEV-ES DebugSwap support */
+static bool sev_es_debug_swap_enabled = true;
+module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
 #else
 #define sev_enabled false
 #define sev_es_enabled false
+#define sev_es_debug_swap_enabled false
 #endif /* CONFIG_KVM_AMD_SEV */
 
 static u8 sev_enc_bit;
@@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
        save->xss  = svm->vcpu.arch.ia32_xss;
        save->dr6  = svm->vcpu.arch.dr6;
 
+       if (sev_es_debug_swap_enabled)
+               save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
+
        pr_debug("Virtual Machine Save Area (VMSA):\n");
        print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
 
@@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
        struct vcpu_svm *svm = to_svm(vcpu);
        int ret;
 
+       if (vcpu->guest_debug) {
+               pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported");
+               return -EINVAL;
+       }
+
        /* Perform some pre-encryption checks against the VMSA */
        ret = sev_es_sync_vmsa(svm);
        if (ret)
@@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm)
                 * Note, the source is not required to have the same number of
                 * vCPUs as the destination when migrating a vanilla SEV VM.
                 */
-               src_vcpu = kvm_get_vcpu(dst_kvm, i);
+               src_vcpu = kvm_get_vcpu(src_kvm, i);
                src_svm = to_svm(src_vcpu);
 
                /*
@@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void)
        bool sev_es_supported = false;
        bool sev_supported = false;
 
-       if (!sev_enabled || !npt_enabled)
+       if (!sev_enabled || !npt_enabled || !nrips)
                goto out;
 
        /*
@@ -2256,6 +2270,9 @@ out:
 
        sev_enabled = sev_supported;
        sev_es_enabled = sev_es_supported;
+       if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
+           !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
+               sev_es_debug_swap_enabled = false;
 #endif
 }
 
@@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
                                            svm->sev_es.ghcb_sa);
                break;
        case SVM_VMGEXIT_NMI_COMPLETE:
-               ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
+               ++vcpu->stat.nmi_window_exits;
+               svm->nmi_masked = false;
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+               ret = 1;
                break;
        case SVM_VMGEXIT_AP_HLT_LOOP:
                ret = kvm_emulate_ap_reset_hold(vcpu);
@@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
 
 static void sev_es_init_vmcb(struct vcpu_svm *svm)
 {
+       struct vmcb *vmcb = svm->vmcb01.ptr;
        struct kvm_vcpu *vcpu = &svm->vcpu;
 
        svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
@@ -2952,9 +2973,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
        /*
         * An SEV-ES guest requires a VMSA area that is a separate from the
         * VMCB page. Do not include the encryption mask on the VMSA physical
-        * address since hardware will access it using the guest key.
+        * address since hardware will access it using the guest key.  Note,
+        * the VMSA will be NULL if this vCPU is the destination for intrahost
+        * migration, and will be copied later.
         */
-       svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+       if (svm->sev_es.vmsa)
+               svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
 
        /* Can't intercept CR register access, HV can't modify CR registers */
        svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
        svm_set_intercept(svm, TRAP_CR4_WRITE);
        svm_set_intercept(svm, TRAP_CR8_WRITE);
 
-       /* No support for enable_vmware_backdoor */
-       clr_exception_intercept(svm, GP_VECTOR);
+       vmcb->control.intercepts[INTERCEPT_DR] = 0;
+       if (!sev_es_debug_swap_enabled) {
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+               recalc_intercepts(svm);
+       } else {
+               /*
+                * Disable #DB intercept iff DebugSwap is enabled.  KVM doesn't
+                * allow debugging SEV-ES guests, and enables DebugSwap iff
+                * NO_NESTED_DATA_BP is supported, so there's no reason to
+                * intercept #DB when DebugSwap is enabled.  For simplicity
+                * with respect to guest debug, intercept #DB for other VMs
+                * even if NO_NESTED_DATA_BP is supported, i.e. even if the
+                * guest can't DoS the CPU with infinite #DB vectoring.
+                */
+               clr_exception_intercept(svm, DB_VECTOR);
+       }
 
        /* Can't intercept XSETBV, HV can't modify XCR0 directly */
        svm_clr_intercept(svm, INTERCEPT_XSETBV);
@@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm)
        svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
        clr_exception_intercept(svm, UD_VECTOR);
 
+       /*
+        * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
+        * KVM can't decrypt guest memory to decode the faulting instruction.
+        */
+       clr_exception_intercept(svm, GP_VECTOR);
+
        if (sev_es_guest(svm->vcpu.kvm))
                sev_es_init_vmcb(svm);
 }
@@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
 void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
 {
        /*
-        * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
-        * of which one step is to perform a VMLOAD.  KVM performs the
-        * corresponding VMSAVE in svm_prepare_guest_switch for both
-        * traditional and SEV-ES guests.
+        * All host state for SEV-ES guests is categorized into three swap types
+        * based on how it is handled by hardware during a world switch:
+        *
+        * A: VMRUN:   Host state saved in host save area
+        *    VMEXIT:  Host state loaded from host save area
+        *
+        * B: VMRUN:   Host state _NOT_ saved in host save area
+        *    VMEXIT:  Host state loaded from host save area
+        *
+        * C: VMRUN:   Host state _NOT_ saved in host save area
+        *    VMEXIT:  Host state initialized to default(reset) values
+        *
+        * Manually save type-B state, i.e. state that is loaded by VMEXIT but
+        * isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
+        * by common SVM code).
         */
-
-       /* XCR0 is restored on VMEXIT, save the current host value */
        hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-
-       /* PKRU is restored on VMEXIT, save the current host value */
        hostsa->pkru = read_pkru();
-
-       /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
        hostsa->xss = host_xss;
+
+       /*
+        * If DebugSwap is enabled, debug registers are loaded but NOT saved by
+        * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
+        * saves and loads debug registers (Type-A).
+        */
+       if (sev_es_debug_swap_enabled) {
+               hostsa->dr0 = native_get_debugreg(0);
+               hostsa->dr1 = native_get_debugreg(1);
+               hostsa->dr2 = native_get_debugreg(2);
+               hostsa->dr3 = native_get_debugreg(3);
+               hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
+               hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
+               hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
+               hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
+       }
 }
 
 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
index d4bfdc6..f283eb4 100644 (file)
 #include <asm/spec-ctrl.h>
 #include <asm/cpu_device_id.h>
 #include <asm/traps.h>
+#include <asm/reboot.h>
 #include <asm/fpu/api.h>
 
-#include <asm/virtext.h>
-
 #include <trace/events/ipi.h>
 
 #include "trace.h"
@@ -203,7 +202,7 @@ static int nested = true;
 module_param(nested, int, S_IRUGO);
 
 /* enable/disable Next RIP Save */
-static int nrips = true;
+int nrips = true;
 module_param(nrips, int, 0444);
 
 /* enable/disable Virtual VMLOAD VMSAVE */
@@ -365,6 +364,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
                svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
 
 }
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+                                       void *insn, int insn_len);
 
 static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
                                           bool commit_side_effects)
@@ -385,6 +386,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
        }
 
        if (!svm->next_rip) {
+               /*
+                * FIXME: Drop this when kvm_emulate_instruction() does the
+                * right thing and treats "can't emulate" as outright failure
+                * for EMULTYPE_SKIP.
+                */
+               if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
+                       return 0;
+
                if (unlikely(!commit_side_effects))
                        old_rflags = svm->vmcb->save.rflags;
 
@@ -517,14 +526,21 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu)
                vcpu->arch.osvw.status |= 1;
 }
 
-static bool kvm_is_svm_supported(void)
+static bool __kvm_is_svm_supported(void)
 {
-       int cpu = raw_smp_processor_id();
-       const char *msg;
+       int cpu = smp_processor_id();
+       struct cpuinfo_x86 *c = &cpu_data(cpu);
+
        u64 vm_cr;
 
-       if (!cpu_has_svm(&msg)) {
-               pr_err("SVM not supported by CPU %d, %s\n", cpu, msg);
+       if (c->x86_vendor != X86_VENDOR_AMD &&
+           c->x86_vendor != X86_VENDOR_HYGON) {
+               pr_err("CPU %d isn't AMD or Hygon\n", cpu);
+               return false;
+       }
+
+       if (!cpu_has(c, X86_FEATURE_SVM)) {
+               pr_err("SVM not supported by CPU %d\n", cpu);
                return false;
        }
 
@@ -542,25 +558,55 @@ static bool kvm_is_svm_supported(void)
        return true;
 }
 
+static bool kvm_is_svm_supported(void)
+{
+       bool supported;
+
+       migrate_disable();
+       supported = __kvm_is_svm_supported();
+       migrate_enable();
+
+       return supported;
+}
+
 static int svm_check_processor_compat(void)
 {
-       if (!kvm_is_svm_supported())
+       if (!__kvm_is_svm_supported())
                return -EIO;
 
        return 0;
 }
 
-void __svm_write_tsc_multiplier(u64 multiplier)
+static void __svm_write_tsc_multiplier(u64 multiplier)
 {
-       preempt_disable();
-
        if (multiplier == __this_cpu_read(current_tsc_ratio))
-               goto out;
+               return;
 
        wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
        __this_cpu_write(current_tsc_ratio, multiplier);
-out:
-       preempt_enable();
+}
+
+static inline void kvm_cpu_svm_disable(void)
+{
+       uint64_t efer;
+
+       wrmsrl(MSR_VM_HSAVE_PA, 0);
+       rdmsrl(MSR_EFER, efer);
+       if (efer & EFER_SVME) {
+               /*
+                * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
+                * NMI aren't blocked.
+                */
+               stgi();
+               wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+       }
+}
+
+static void svm_emergency_disable(void)
+{
+       kvm_rebooting = true;
+
+       kvm_cpu_svm_disable();
 }
 
 static void svm_hardware_disable(void)
@@ -569,7 +615,7 @@ static void svm_hardware_disable(void)
        if (tsc_scaling)
                __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
 
-       cpu_svm_disable();
+       kvm_cpu_svm_disable();
 
        amd_pmu_disable_virt();
 }
@@ -677,6 +723,39 @@ free_save_area:
 
 }
 
+static void set_dr_intercepts(struct vcpu_svm *svm)
+{
+       struct vmcb *vmcb = svm->vmcb01.ptr;
+
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+
+       recalc_intercepts(svm);
+}
+
+static void clr_dr_intercepts(struct vcpu_svm *svm)
+{
+       struct vmcb *vmcb = svm->vmcb01.ptr;
+
+       vmcb->control.intercepts[INTERCEPT_DR] = 0;
+
+       recalc_intercepts(svm);
+}
+
 static int direct_access_msr_slot(u32 msr)
 {
        u32 i;
@@ -947,50 +1026,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
                svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
 }
 
-static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
+static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
 {
        /*
-        * If the LBR virtualization is disabled, the LBR msrs are always
-        * kept in the vmcb01 to avoid copying them on nested guest entries.
-        *
-        * If nested, and the LBR virtualization is enabled/disabled, the msrs
-        * are moved between the vmcb01 and vmcb02 as needed.
+        * If LBR virtualization is disabled, the LBR MSRs are always kept in
+        * vmcb01.  If LBR virtualization is enabled and L1 is running VMs of
+        * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
         */
-       struct vmcb *vmcb =
-               (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
-                       svm->vmcb : svm->vmcb01.ptr;
-
-       switch (index) {
-       case MSR_IA32_DEBUGCTLMSR:
-               return vmcb->save.dbgctl;
-       case MSR_IA32_LASTBRANCHFROMIP:
-               return vmcb->save.br_from;
-       case MSR_IA32_LASTBRANCHTOIP:
-               return vmcb->save.br_to;
-       case MSR_IA32_LASTINTFROMIP:
-               return vmcb->save.last_excp_from;
-       case MSR_IA32_LASTINTTOIP:
-               return vmcb->save.last_excp_to;
-       default:
-               KVM_BUG(false, svm->vcpu.kvm,
-                       "%s: Unknown MSR 0x%x", __func__, index);
-               return 0;
-       }
+       return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
+                                                                  svm->vmcb01.ptr;
 }
 
 void svm_update_lbrv(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-
-       bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
-                                          DEBUGCTLMSR_LBR;
-
-       bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
-                                     LBR_CTL_ENABLE_MASK);
-
-       if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
-               if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
-                       enable_lbrv = true;
+       bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
+       bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
+                           (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+                           (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
 
        if (enable_lbrv == current_enable_lbrv)
                return;
@@ -1101,21 +1154,23 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
        return svm->tsc_ratio_msr;
 }
 
-static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
-       svm->vmcb->control.tsc_offset = offset;
+       svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
        vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
-static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu)
 {
-       __svm_write_tsc_multiplier(multiplier);
+       preempt_disable();
+       if (to_svm(vcpu)->guest_state_loaded)
+               __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+       preempt_enable();
 }
 
-
 /* Evaluate instruction intercepts that depend on guest CPUID features. */
 static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
                                              struct vcpu_svm *svm)
@@ -1156,8 +1211,6 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
 
                set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
                set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
-
-               svm->v_vmload_vmsave_enabled = false;
        } else {
                /*
                 * If hardware supports Virtual VMLOAD VMSAVE then enable it
@@ -1201,10 +1254,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
         * Guest access to VMware backdoor ports could legitimately
         * trigger #GP because of TSS I/O permission bitmap.
         * We intercept those #GP and allow access to them anyway
-        * as VMware does.  Don't intercept #GP for SEV guests as KVM can't
-        * decrypt guest memory to decode the faulting instruction.
+        * as VMware does.
         */
-       if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
+       if (enable_vmware_backdoor)
                set_exception_intercept(svm, GP_VECTOR);
 
        svm_set_intercept(svm, INTERCEPT_INTR);
@@ -1949,7 +2001,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       if (vcpu->arch.guest_state_protected)
+       if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
                return;
 
        get_debugreg(vcpu->arch.db[0], 0);
@@ -2510,12 +2562,13 @@ static int iret_interception(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
+       WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
+
        ++vcpu->stat.nmi_window_exits;
        svm->awaiting_iret_completion = true;
 
        svm_clr_iret_intercept(svm);
-       if (!sev_es_guest(vcpu->kvm))
-               svm->nmi_iret_rip = kvm_rip_read(vcpu);
+       svm->nmi_iret_rip = kvm_rip_read(vcpu);
 
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        return 1;
@@ -2680,6 +2733,13 @@ static int dr_interception(struct kvm_vcpu *vcpu)
        unsigned long val;
        int err = 0;
 
+       /*
+        * SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
+        * for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return 1;
+
        if (vcpu->guest_debug == 0) {
                /*
                 * No more DR vmexits; force a reload of the debug registers
@@ -2764,7 +2824,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
        switch (msr_info->index) {
        case MSR_AMD64_TSC_RATIO:
-               if (!msr_info->host_initiated && !svm->tsc_scaling_enabled)
+               if (!msr_info->host_initiated &&
+                   !guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR))
                        return 1;
                msr_info->data = svm->tsc_ratio_msr;
                break;
@@ -2802,11 +2863,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                msr_info->data = svm->tsc_aux;
                break;
        case MSR_IA32_DEBUGCTLMSR:
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
+               break;
        case MSR_IA32_LASTBRANCHFROMIP:
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
+               break;
        case MSR_IA32_LASTBRANCHTOIP:
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
+               break;
        case MSR_IA32_LASTINTFROMIP:
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
+               break;
        case MSR_IA32_LASTINTTOIP:
-               msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
+               msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
                break;
        case MSR_VM_HSAVE_PA:
                msr_info->data = svm->nested.hsave_msr;
@@ -2906,7 +2975,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
        switch (ecx) {
        case MSR_AMD64_TSC_RATIO:
 
-               if (!svm->tsc_scaling_enabled) {
+               if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) {
 
                        if (!msr->host_initiated)
                                return 1;
@@ -2928,7 +2997,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 
                svm->tsc_ratio_msr = data;
 
-               if (svm->tsc_scaling_enabled && is_guest_mode(vcpu))
+               if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+                   is_guest_mode(vcpu))
                        nested_svm_update_tsc_ratio_msr(vcpu);
 
                break;
@@ -3037,13 +3107,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                if (data & DEBUGCTL_RESERVED_BITS)
                        return 1;
 
-               if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
-                       svm->vmcb->save.dbgctl = data;
-               else
-                       svm->vmcb01.ptr->save.dbgctl = data;
-
+               svm_get_lbr_vmcb(svm)->save.dbgctl = data;
                svm_update_lbrv(vcpu);
-
                break;
        case MSR_VM_HSAVE_PA:
                /*
@@ -3769,6 +3834,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
        if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
                return; /* IRET will cause a vm exit */
 
+       /*
+        * SEV-ES guests are responsible for signaling when a vCPU is ready to
+        * receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
+        * KVM can't intercept and single-step IRET to detect when NMIs are
+        * unblocked (architecturally speaking).  See SVM_VMGEXIT_NMI_COMPLETE.
+        *
+        * Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
+        * ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
+        * supported NAEs in the GHCB protocol.
+        */
+       if (sev_es_guest(vcpu->kvm))
+               return;
+
        if (!gif_set(svm)) {
                if (vgif)
                        svm_set_intercept(svm, INTERCEPT_STGI);
@@ -3918,12 +3996,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu)
        svm->soft_int_injected = false;
 
        /*
-        * If we've made progress since setting HF_IRET_MASK, we've
+        * If we've made progress since setting awaiting_iret_completion, we've
         * executed an IRET and can allow NMI injection.
         */
        if (svm->awaiting_iret_completion &&
-           (sev_es_guest(vcpu->kvm) ||
-            kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
+           kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
                svm->awaiting_iret_completion = false;
                svm->nmi_masked = false;
                kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -4209,28 +4286,37 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        struct kvm_cpuid_entry2 *best;
 
-       vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-                                   boot_cpu_has(X86_FEATURE_XSAVE) &&
-                                   boot_cpu_has(X86_FEATURE_XSAVES);
-
-       /* Update nrips enabled cache */
-       svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
-                            guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
-
-       svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
-       svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
-
-       svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
-
-       svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
+       /*
+        * SVM doesn't provide a way to disable just XSAVES in the guest, KVM
+        * can only disable all variants of by disallowing CR4.OSXSAVE from
+        * being set.  As a result, if the host has XSAVE and XSAVES, and the
+        * guest has XSAVE enabled, the guest can execute XSAVES without
+        * faulting.  Treat XSAVES as enabled in this case regardless of
+        * whether it's advertised to the guest so that KVM context switches
+        * XSS on VM-Enter/VM-Exit.  Failure to do so would effectively give
+        * the guest read/write access to the host's XSS.
+        */
+       if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+           boot_cpu_has(X86_FEATURE_XSAVES) &&
+           guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+               kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES);
 
-       svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
 
-       svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
+       /*
+        * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
+        * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
+        * SVM on Intel is bonkers and extremely unlikely to work).
+        */
+       if (!guest_cpuid_is_intel(vcpu))
+               kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
 
-       svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI);
 
        svm_recalc_instruction_intercepts(vcpu, svm);
 
@@ -4651,16 +4737,25 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
         * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
         * decode garbage.
         *
-        * Inject #UD if KVM reached this point without an instruction buffer.
-        * In practice, this path should never be hit by a well-behaved guest,
-        * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
-        * is still theoretically reachable, e.g. via unaccelerated fault-like
-        * AVIC access, and needs to be handled by KVM to avoid putting the
-        * guest into an infinite loop.   Injecting #UD is somewhat arbitrary,
-        * but its the least awful option given lack of insight into the guest.
+        * If KVM is NOT trying to simply skip an instruction, inject #UD if
+        * KVM reached this point without an instruction buffer.  In practice,
+        * this path should never be hit by a well-behaved guest, e.g. KVM
+        * doesn't intercept #UD or #GP for SEV guests, but this path is still
+        * theoretically reachable, e.g. via unaccelerated fault-like AVIC
+        * access, and needs to be handled by KVM to avoid putting the guest
+        * into an infinite loop.   Injecting #UD is somewhat arbitrary, but
+        * its the least awful option given lack of insight into the guest.
+        *
+        * If KVM is trying to skip an instruction, simply resume the guest.
+        * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
+        * will attempt to re-inject the INT3/INTO and skip the instruction.
+        * In that scenario, retrying the INT3/INTO and hoping the guest will
+        * make forward progress is the only option that has a chance of
+        * success (and in practice it will work the vast majority of the time).
         */
        if (unlikely(!insn)) {
-               kvm_queue_exception(vcpu, UD_VECTOR);
+               if (!(emul_type & EMULTYPE_SKIP))
+                       kvm_queue_exception(vcpu, UD_VECTOR);
                return false;
        }
 
@@ -5112,9 +5207,11 @@ static __init int svm_hardware_setup(void)
 
        svm_adjust_mmio_mask();
 
+       nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
+
        /*
         * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
-        * may be modified by svm_adjust_mmio_mask()).
+        * may be modified by svm_adjust_mmio_mask()), as well as nrips.
         */
        sev_hardware_setup();
 
@@ -5126,11 +5223,6 @@ static __init int svm_hardware_setup(void)
                        goto err;
        }
 
-       if (nrips) {
-               if (!boot_cpu_has(X86_FEATURE_NRIPS))
-                       nrips = false;
-       }
-
        enable_apicv = avic = avic && avic_hardware_setup();
 
        if (!enable_apicv) {
@@ -5213,6 +5305,13 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = {
        .pmu_ops = &amd_pmu_ops,
 };
 
+static void __svm_exit(void)
+{
+       kvm_x86_vendor_exit();
+
+       cpu_emergency_unregister_virt_callback(svm_emergency_disable);
+}
+
 static int __init svm_init(void)
 {
        int r;
@@ -5226,6 +5325,8 @@ static int __init svm_init(void)
        if (r)
                return r;
 
+       cpu_emergency_register_virt_callback(svm_emergency_disable);
+
        /*
         * Common KVM initialization _must_ come last, after this, /dev/kvm is
         * exposed to userspace!
@@ -5238,14 +5339,14 @@ static int __init svm_init(void)
        return 0;
 
 err_kvm_init:
-       kvm_x86_vendor_exit();
+       __svm_exit();
        return r;
 }
 
 static void __exit svm_exit(void)
 {
        kvm_exit();
-       kvm_x86_vendor_exit();
+       __svm_exit();
 }
 
 module_init(svm_init)
index 8239c8d..f412539 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/svm.h>
 #include <asm/sev-common.h>
 
+#include "cpuid.h"
 #include "kvm_cache_regs.h"
 
 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
@@ -33,6 +34,7 @@
 #define MSRPM_OFFSETS  32
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;
+extern int nrips;
 extern int vgif;
 extern bool intercept_smi;
 extern bool x2avic_enabled;
@@ -260,16 +262,6 @@ struct vcpu_svm {
        unsigned long soft_int_next_rip;
        bool soft_int_injected;
 
-       /* optional nested SVM features that are enabled for this guest  */
-       bool nrips_enabled                : 1;
-       bool tsc_scaling_enabled          : 1;
-       bool v_vmload_vmsave_enabled      : 1;
-       bool lbrv_enabled                 : 1;
-       bool pause_filter_enabled         : 1;
-       bool pause_threshold_enabled      : 1;
-       bool vgif_enabled                 : 1;
-       bool vnmi_enabled                 : 1;
-
        u32 ldr_reg;
        u32 dfr_reg;
        struct page *avic_backing_page;
@@ -406,48 +398,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3
        return test_bit(bit, (unsigned long *)&control->intercepts);
 }
 
-static inline void set_dr_intercepts(struct vcpu_svm *svm)
-{
-       struct vmcb *vmcb = svm->vmcb01.ptr;
-
-       if (!sev_es_guest(svm->vcpu.kvm)) {
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
-       }
-
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-       vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
-
-       recalc_intercepts(svm);
-}
-
-static inline void clr_dr_intercepts(struct vcpu_svm *svm)
-{
-       struct vmcb *vmcb = svm->vmcb01.ptr;
-
-       vmcb->control.intercepts[INTERCEPT_DR] = 0;
-
-       /* DR7 access must remain intercepted for an SEV-ES guest */
-       if (sev_es_guest(svm->vcpu.kvm)) {
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-               vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
-       }
-
-       recalc_intercepts(svm);
-}
-
 static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
 {
        struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -493,7 +443,8 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
 
 static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
 {
-       return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
+       return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
+              (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
 }
 
 static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
@@ -544,7 +495,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
 
 static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
 {
-       return svm->vnmi_enabled &&
+       return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
               (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
 }
 
@@ -660,7 +611,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
                               bool has_error_code, u32 error_code);
 int nested_svm_exit_special(struct vcpu_svm *svm);
 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
-void __svm_write_tsc_multiplier(u64 multiplier);
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu);
 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
                                       struct vmcb_control_area *control);
 void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
index d0abee3..41a4533 100644 (file)
@@ -252,7 +252,7 @@ static inline bool cpu_has_vmx_pml(void)
 static inline bool cpu_has_vmx_xsaves(void)
 {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
-               SECONDARY_EXEC_XSAVES;
+               SECONDARY_EXEC_ENABLE_XSAVES;
 }
 
 static inline bool cpu_has_vmx_waitpkg(void)
index 79450e1..313b8bb 100644 (file)
@@ -78,7 +78,7 @@
         SECONDARY_EXEC_DESC |                                          \
         SECONDARY_EXEC_ENABLE_RDTSCP |                                 \
         SECONDARY_EXEC_ENABLE_INVPCID |                                \
-        SECONDARY_EXEC_XSAVES |                                        \
+        SECONDARY_EXEC_ENABLE_XSAVES |                                 \
         SECONDARY_EXEC_RDSEED_EXITING |                                \
         SECONDARY_EXEC_RDRAND_EXITING |                                \
         SECONDARY_EXEC_TSC_SCALING |                                   \
index 516391c..c5ec0ef 100644 (file)
@@ -2307,7 +2307,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
                                  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                                  SECONDARY_EXEC_ENABLE_INVPCID |
                                  SECONDARY_EXEC_ENABLE_RDTSCP |
-                                 SECONDARY_EXEC_XSAVES |
+                                 SECONDARY_EXEC_ENABLE_XSAVES |
                                  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                                  SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -6331,7 +6331,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
                 * If if it were, XSS would have to be checked against
                 * the XSS exit bitmap in vmcs12.
                 */
-               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
        case EXIT_REASON_UMWAIT:
        case EXIT_REASON_TPAUSE:
                return nested_cpu_has2(vmcs12,
@@ -6426,7 +6426,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
        vmx = to_vmx(vcpu);
        vmcs12 = get_vmcs12(vcpu);
 
-       if (nested_vmx_allowed(vcpu) &&
+       if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
            (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
                kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
                kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
@@ -6567,7 +6567,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
                if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
                        return -EINVAL;
        } else {
-               if (!nested_vmx_allowed(vcpu))
+               if (!guest_can_use(vcpu, X86_FEATURE_VMX))
                        return -EINVAL;
 
                if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
@@ -6601,7 +6601,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
                return -EINVAL;
 
        if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
-               (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
+           (!guest_can_use(vcpu, X86_FEATURE_VMX) ||
+            !vmx->nested.enlightened_vmcs_enabled))
                        return -EINVAL;
 
        vmx_leave_nested(vcpu);
@@ -6874,7 +6875,7 @@ static void nested_vmx_setup_secondary_ctls(u32 ept_caps,
                SECONDARY_EXEC_ENABLE_INVPCID |
                SECONDARY_EXEC_ENABLE_VMFUNC |
                SECONDARY_EXEC_RDSEED_EXITING |
-               SECONDARY_EXEC_XSAVES |
+               SECONDARY_EXEC_ENABLE_XSAVES |
                SECONDARY_EXEC_TSC_SCALING |
                SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
 
index 9695226..b4b9d51 100644 (file)
@@ -168,7 +168,7 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 
 static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
 {
-       return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+       return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
 }
 
 static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
index 80c769c..f2efa0b 100644 (file)
 
 #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
 
+enum intel_pmu_architectural_events {
+       /*
+        * The order of the architectural events matters as support for each
+        * event is enumerated via CPUID using the index of the event.
+        */
+       INTEL_ARCH_CPU_CYCLES,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED,
+       INTEL_ARCH_REFERENCE_CYCLES,
+       INTEL_ARCH_LLC_REFERENCES,
+       INTEL_ARCH_LLC_MISSES,
+       INTEL_ARCH_BRANCHES_RETIRED,
+       INTEL_ARCH_BRANCHES_MISPREDICTED,
+
+       NR_REAL_INTEL_ARCH_EVENTS,
+
+       /*
+        * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
+        * TSC reference cycles.  The architectural reference cycles event may
+        * or may not actually use the TSC as the reference, e.g. might use the
+        * core crystal clock or the bus clock (yeah, "architectural").
+        */
+       PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
+       NR_INTEL_ARCH_EVENTS,
+};
+
 static struct {
        u8 eventsel;
        u8 unit_mask;
 } const intel_arch_events[] = {
-       [0] = { 0x3c, 0x00 },
-       [1] = { 0xc0, 0x00 },
-       [2] = { 0x3c, 0x01 },
-       [3] = { 0x2e, 0x4f },
-       [4] = { 0x2e, 0x41 },
-       [5] = { 0xc4, 0x00 },
-       [6] = { 0xc5, 0x00 },
-       /* The above index must match CPUID 0x0A.EBX bit vector */
-       [7] = { 0x00, 0x03 },
+       [INTEL_ARCH_CPU_CYCLES]                 = { 0x3c, 0x00 },
+       [INTEL_ARCH_INSTRUCTIONS_RETIRED]       = { 0xc0, 0x00 },
+       [INTEL_ARCH_REFERENCE_CYCLES]           = { 0x3c, 0x01 },
+       [INTEL_ARCH_LLC_REFERENCES]             = { 0x2e, 0x4f },
+       [INTEL_ARCH_LLC_MISSES]                 = { 0x2e, 0x41 },
+       [INTEL_ARCH_BRANCHES_RETIRED]           = { 0xc4, 0x00 },
+       [INTEL_ARCH_BRANCHES_MISPREDICTED]      = { 0xc5, 0x00 },
+       [PSEUDO_ARCH_REFERENCE_CYCLES]          = { 0x00, 0x03 },
 };
 
 /* mapping between fixed pmc index and intel_arch_events array */
-static int fixed_pmc_events[] = {1, 0, 7};
+static int fixed_pmc_events[] = {
+       [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
+       [1] = INTEL_ARCH_CPU_CYCLES,
+       [2] = PSEUDO_ARCH_REFERENCE_CYCLES,
+};
 
 static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 {
@@ -80,16 +108,18 @@ static bool intel_hw_event_available(struct kvm_pmc *pmc)
        u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) {
+       BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS);
+
+       /*
+        * Disallow events reported as unavailable in guest CPUID.  Note, this
+        * doesn't apply to pseudo-architectural events.
+        */
+       for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) {
                if (intel_arch_events[i].eventsel != event_select ||
                    intel_arch_events[i].unit_mask != unit_mask)
                        continue;
 
-               /* disable event that reported as not present by cpuid */
-               if ((i < 7) && !(pmu->available_event_types & (1 << i)))
-                       return false;
-
-               break;
+               return pmu->available_event_types & BIT(i);
        }
 
        return true;
@@ -438,16 +468,17 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
 {
-       size_t size = ARRAY_SIZE(fixed_pmc_events);
-       struct kvm_pmc *pmc;
-       u32 event;
        int i;
 
+       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
+
        for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
-               pmc = &pmu->fixed_counters[i];
-               event = fixed_pmc_events[array_index_nospec(i, size)];
+               int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
+               struct kvm_pmc *pmc = &pmu->fixed_counters[index];
+               u32 event = fixed_pmc_events[index];
+
                pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
-                       intel_arch_events[event].eventsel;
+                                intel_arch_events[event].eventsel;
        }
 }
 
@@ -508,10 +539,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
        if (pmu->version == 1) {
                pmu->nr_arch_fixed_counters = 0;
        } else {
-               pmu->nr_arch_fixed_counters =
-                       min3(ARRAY_SIZE(fixed_pmc_events),
-                            (size_t) edx.split.num_counters_fixed,
-                            (size_t)kvm_pmu_cap.num_counters_fixed);
+               pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed,
+                                                   kvm_pmu_cap.num_counters_fixed);
                edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
                                                  kvm_pmu_cap.bit_width_fixed);
                pmu->counter_bitmask[KVM_PMC_FIXED] =
index b483a8b..72e3943 100644 (file)
 #include <asm/idtentry.h>
 #include <asm/io.h>
 #include <asm/irq_remapping.h>
-#include <asm/kexec.h>
+#include <asm/reboot.h>
 #include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/mshyperv.h>
 #include <asm/mwait.h>
 #include <asm/spec-ctrl.h>
-#include <asm/virtext.h>
 #include <asm/vmx.h>
 
 #include "capabilities.h"
@@ -237,9 +236,6 @@ static const struct {
 #define L1D_CACHE_ORDER 4
 static void *vmx_l1d_flush_pages;
 
-/* Control for disabling CPU Fill buffer clear */
-static bool __read_mostly vmx_fb_clear_ctrl_available;
-
 static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 {
        struct page *page;
@@ -255,14 +251,9 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
                return 0;
        }
 
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-               u64 msr;
-
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-               if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
-                       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
-                       return 0;
-               }
+       if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+               l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+               return 0;
        }
 
        /* If set to auto use the default l1tf mitigation method */
@@ -366,22 +357,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
 {
        if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
-               return sprintf(s, "???\n");
+               return sysfs_emit(s, "???\n");
 
-       return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
-}
-
-static void vmx_setup_fb_clear_ctrl(void)
-{
-       u64 msr;
-
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
-           !boot_cpu_has_bug(X86_BUG_MDS) &&
-           !boot_cpu_has_bug(X86_BUG_TAA)) {
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-               if (msr & ARCH_CAP_FB_CLEAR_CTRL)
-                       vmx_fb_clear_ctrl_available = true;
-       }
+       return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
 }
 
 static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
@@ -409,7 +387,9 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
 
 static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
 {
-       vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+       vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+                               !boot_cpu_has_bug(X86_BUG_MDS) &&
+                               !boot_cpu_has_bug(X86_BUG_TAA);
 
        /*
         * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@@ -754,17 +734,51 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
        return ret;
 }
 
-#ifdef CONFIG_KEXEC_CORE
-static void crash_vmclear_local_loaded_vmcss(void)
+/*
+ * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
+ *
+ * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
+ * atomically track post-VMXON state, e.g. this may be called in NMI context.
+ * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
+ * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
+ * magically in RM, VM86, compat mode, or at CPL>0.
+ */
+static int kvm_cpu_vmxoff(void)
+{
+       asm_volatile_goto("1: vmxoff\n\t"
+                         _ASM_EXTABLE(1b, %l[fault])
+                         ::: "cc", "memory" : fault);
+
+       cr4_clear_bits(X86_CR4_VMXE);
+       return 0;
+
+fault:
+       cr4_clear_bits(X86_CR4_VMXE);
+       return -EIO;
+}
+
+static void vmx_emergency_disable(void)
 {
        int cpu = raw_smp_processor_id();
        struct loaded_vmcs *v;
 
+       kvm_rebooting = true;
+
+       /*
+        * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
+        * set in task context.  If this races with VMX is disabled by an NMI,
+        * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
+        * kvm_rebooting set.
+        */
+       if (!(__read_cr4() & X86_CR4_VMXE))
+               return;
+
        list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
                            loaded_vmcss_on_cpu_link)
                vmcs_clear(v->vmcs);
+
+       kvm_cpu_vmxoff();
 }
-#endif /* CONFIG_KEXEC_CORE */
 
 static void __loaded_vmcs_clear(void *arg)
 {
@@ -1899,25 +1913,14 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
        return kvm_caps.default_tsc_scaling_ratio;
 }
 
-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
 {
-       vmcs_write64(TSC_OFFSET, offset);
+       vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
 }
 
-static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
 {
-       vmcs_write64(TSC_MULTIPLIER, multiplier);
-}
-
-/*
- * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
- * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
- * all guests if the "nested" module option is off, and can also be disabled
- * for a single guest by disabling its VMX cpuid bit.
- */
-bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
-{
-       return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
+       vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
 }
 
 /*
@@ -2047,7 +2050,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
                break;
        case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
-               if (!nested_vmx_allowed(vcpu))
+               if (!guest_can_use(vcpu, X86_FEATURE_VMX))
                        return 1;
                if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
                                    &msr_info->data))
@@ -2355,7 +2358,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
                if (!msr_info->host_initiated)
                        return 1; /* they are read-only */
-               if (!nested_vmx_allowed(vcpu))
+               if (!guest_can_use(vcpu, X86_FEATURE_VMX))
                        return 1;
                return vmx_set_vmx_msr(vcpu, msr_index, data);
        case MSR_IA32_RTIT_CTL:
@@ -2729,11 +2732,11 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
        return 0;
 }
 
-static bool kvm_is_vmx_supported(void)
+static bool __kvm_is_vmx_supported(void)
 {
-       int cpu = raw_smp_processor_id();
+       int cpu = smp_processor_id();
 
-       if (!cpu_has_vmx()) {
+       if (!(cpuid_ecx(1) & feature_bit(VMX))) {
                pr_err("VMX not supported by CPU %d\n", cpu);
                return false;
        }
@@ -2747,13 +2750,24 @@ static bool kvm_is_vmx_supported(void)
        return true;
 }
 
+static bool kvm_is_vmx_supported(void)
+{
+       bool supported;
+
+       migrate_disable();
+       supported = __kvm_is_vmx_supported();
+       migrate_enable();
+
+       return supported;
+}
+
 static int vmx_check_processor_compat(void)
 {
        int cpu = raw_smp_processor_id();
        struct vmcs_config vmcs_conf;
        struct vmx_capability vmx_cap;
 
-       if (!kvm_is_vmx_supported())
+       if (!__kvm_is_vmx_supported())
                return -EIO;
 
        if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
@@ -2833,7 +2847,7 @@ static void vmx_hardware_disable(void)
 {
        vmclear_local_loaded_vmcss();
 
-       if (cpu_vmxoff())
+       if (kvm_cpu_vmxoff())
                kvm_spurious_fault();
 
        hv_reset_evmcs();
@@ -3071,13 +3085,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
        vmx->rmode.vm86_active = 1;
 
-       /*
-        * Very old userspace does not call KVM_SET_TSS_ADDR before entering
-        * vcpu. Warn the user that an update is overdue.
-        */
-       if (!kvm_vmx->tss_addr)
-               pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n");
-
        vmx_segment_cache_clear(vmx);
 
        vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
@@ -3350,7 +3357,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        vmx->emulation_required = vmx_emulation_required(vcpu);
 }
 
-static int vmx_get_max_tdp_level(void)
+static int vmx_get_max_ept_level(void)
 {
        if (cpu_has_vmx_ept_5levels())
                return 5;
@@ -4553,16 +4560,19 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
  * based on a single guest CPUID bit, with a dedicated feature bit.  This also
  * verifies that the control is actually supported by KVM and hardware.
  */
-#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
-({                                                                      \
-       bool __enabled;                                                  \
-                                                                        \
-       if (cpu_has_vmx_##name()) {                                      \
-               __enabled = guest_cpuid_has(&(vmx)->vcpu,                \
-                                           X86_FEATURE_##feat_name);    \
-               vmx_adjust_secondary_exec_control(vmx, exec_control,     \
-                       SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \
-       }                                                                \
+#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting)    \
+({                                                                                             \
+       struct kvm_vcpu *__vcpu = &(vmx)->vcpu;                                                 \
+       bool __enabled;                                                                         \
+                                                                                               \
+       if (cpu_has_vmx_##name()) {                                                             \
+               if (kvm_is_governed_feature(X86_FEATURE_##feat_name))                           \
+                       __enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name);             \
+               else                                                                            \
+                       __enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name);           \
+               vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\
+                                                 __enabled, exiting);                          \
+       }                                                                                       \
 })
 
 /* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
@@ -4622,19 +4632,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
        if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
                exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
-       if (cpu_has_vmx_xsaves()) {
-               /* Exposing XSAVES only when XSAVE is exposed */
-               bool xsaves_enabled =
-                       boot_cpu_has(X86_FEATURE_XSAVE) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
-
-               vcpu->arch.xsaves_enabled = xsaves_enabled;
-
-               vmx_adjust_secondary_exec_control(vmx, &exec_control,
-                                                 SECONDARY_EXEC_XSAVES,
-                                                 xsaves_enabled, false);
-       }
+       vmx_adjust_sec_exec_feature(vmx, &exec_control, xsaves, XSAVES);
 
        /*
         * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
@@ -4653,6 +4651,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
                                                  SECONDARY_EXEC_ENABLE_RDTSCP,
                                                  rdpid_or_rdtscp_enabled, false);
        }
+
        vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
 
        vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
@@ -6796,8 +6795,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
        vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
        read_unlock(&vcpu->kvm->mmu_lock);
 
-       vmx_flush_tlb_current(vcpu);
-
+       /*
+        * No need for a manual TLB flush at this point, KVM has already done a
+        * flush if there were SPTEs pointing at the previous page.
+        */
 out:
        /*
         * Do not pin apic access page in memory, the MMU notifier
@@ -7243,13 +7244,20 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                                   flags);
 
        vcpu->arch.cr2 = native_read_cr2();
+       vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
+
+       vmx->idt_vectoring_info = 0;
 
        vmx_enable_fb_clear(vmx);
 
-       if (unlikely(vmx->fail))
+       if (unlikely(vmx->fail)) {
                vmx->exit_reason.full = 0xdead;
-       else
-               vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+               goto out;
+       }
+
+       vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+       if (likely(!vmx->exit_reason.failed_vmentry))
+               vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
        if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
            is_nmi(vmx_get_intr_info(vcpu))) {
@@ -7258,6 +7266,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
                kvm_after_interrupt(vcpu);
        }
 
+out:
        guest_state_exit_irqoff();
 }
 
@@ -7379,8 +7388,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
        loadsegment(es, __USER_DS);
 #endif
 
-       vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
-
        pt_guest_exit(vmx);
 
        kvm_load_host_xsave_state(vcpu);
@@ -7397,17 +7404,12 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                vmx->nested.nested_run_pending = 0;
        }
 
-       vmx->idt_vectoring_info = 0;
-
        if (unlikely(vmx->fail))
                return EXIT_FASTPATH_NONE;
 
        if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
                kvm_machine_check();
 
-       if (likely(!vmx->exit_reason.failed_vmentry))
-               vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-
        trace_kvm_exit(vcpu, KVM_ISA_VMX);
 
        if (unlikely(vmx->exit_reason.failed_vmentry))
@@ -7751,8 +7753,16 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-       /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
-       vcpu->arch.xsaves_enabled = false;
+       /*
+        * XSAVES is effectively enabled if and only if XSAVE is also exposed
+        * to the guest.  XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
+        * set if and only if XSAVE is supported.
+        */
+       if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+           guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+               kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
+
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
 
        vmx_setup_uret_msrs(vmx);
 
@@ -7760,7 +7770,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                vmcs_set_secondary_exec_control(vmx,
                                                vmx_secondary_exec_control(vmx));
 
-       if (nested_vmx_allowed(vcpu))
+       if (guest_can_use(vcpu, X86_FEATURE_VMX))
                vmx->msr_ia32_feature_control_valid_bits |=
                        FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
                        FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
@@ -7769,7 +7779,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
                        ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
                          FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
 
-       if (nested_vmx_allowed(vcpu))
+       if (guest_can_use(vcpu, X86_FEATURE_VMX))
                nested_vmx_cr_fixed1_bits_update(vcpu);
 
        if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
@@ -8526,7 +8536,7 @@ static __init int hardware_setup(void)
         */
        vmx_setup_me_spte_mask();
 
-       kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
+       kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(),
                          ept_caps_to_lpage_level(vmx_capability.ept));
 
        /*
@@ -8622,10 +8632,8 @@ static void __vmx_exit(void)
 {
        allow_smaller_maxphyaddr = false;
 
-#ifdef CONFIG_KEXEC_CORE
-       RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
-       synchronize_rcu();
-#endif
+       cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
+
        vmx_cleanup_l1d_flush();
 }
 
@@ -8666,18 +8674,14 @@ static int __init vmx_init(void)
        if (r)
                goto err_l1d_flush;
 
-       vmx_setup_fb_clear_ctrl();
-
        for_each_possible_cpu(cpu) {
                INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
 
                pi_init_cpu(cpu);
        }
 
-#ifdef CONFIG_KEXEC_CORE
-       rcu_assign_pointer(crash_vmclear_loaded_vmcss,
-                          crash_vmclear_local_loaded_vmcss);
-#endif
+       cpu_emergency_register_virt_callback(vmx_emergency_disable);
+
        vmx_check_vmcs12_offsets();
 
        /*
index 32384ba..c2130d2 100644 (file)
@@ -374,7 +374,6 @@ struct kvm_vmx {
        u64 *pid_table;
 };
 
-bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
                        struct loaded_vmcs *buddy);
 int allocate_vpid(void);
@@ -562,7 +561,7 @@ static inline u8 vmx_get_rvi(void)
         SECONDARY_EXEC_APIC_REGISTER_VIRT |                            \
         SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |                         \
         SECONDARY_EXEC_SHADOW_VMCS |                                   \
-        SECONDARY_EXEC_XSAVES |                                        \
+        SECONDARY_EXEC_ENABLE_XSAVES |                                 \
         SECONDARY_EXEC_RDSEED_EXITING |                                \
         SECONDARY_EXEC_RDRAND_EXITING |                                \
         SECONDARY_EXEC_ENABLE_PML |                                    \
index c381770..6c9c81e 100644 (file)
@@ -25,6 +25,7 @@
 #include "tss.h"
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
+#include "mmu/page_track.h"
 #include "x86.h"
 #include "cpuid.h"
 #include "pmu.h"
@@ -237,6 +238,9 @@ EXPORT_SYMBOL_GPL(enable_apicv);
 u64 __read_mostly host_xss;
 EXPORT_SYMBOL_GPL(host_xss);
 
+u64 __read_mostly host_arch_capabilities;
+EXPORT_SYMBOL_GPL(host_arch_capabilities);
+
 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
        KVM_GENERIC_VM_STATS(),
        STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
@@ -1021,7 +1025,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
                if (vcpu->arch.xcr0 != host_xcr0)
                        xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
 
-               if (vcpu->arch.xsaves_enabled &&
+               if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
                    vcpu->arch.ia32_xss != host_xss)
                        wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
        }
@@ -1052,7 +1056,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
                if (vcpu->arch.xcr0 != host_xcr0)
                        xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
 
-               if (vcpu->arch.xsaves_enabled &&
+               if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
                    vcpu->arch.ia32_xss != host_xss)
                        wrmsrl(MSR_IA32_XSS, host_xss);
        }
@@ -1620,12 +1624,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
 
 static u64 kvm_get_arch_capabilities(void)
 {
-       u64 data = 0;
-
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
-               data &= KVM_SUPPORTED_ARCH_CAP;
-       }
+       u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
 
        /*
         * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
@@ -2631,7 +2630,7 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
        else
                vcpu->arch.tsc_offset = l1_offset;
 
-       static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
+       static_call(kvm_x86_write_tsc_offset)(vcpu);
 }
 
 static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
@@ -2647,8 +2646,7 @@ static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multipli
                vcpu->arch.tsc_scaling_ratio = l1_multiplier;
 
        if (kvm_caps.has_tsc_control)
-               static_call(kvm_x86_write_tsc_multiplier)(
-                       vcpu, vcpu->arch.tsc_scaling_ratio);
+               static_call(kvm_x86_write_tsc_multiplier)(vcpu);
 }
 
 static inline bool kvm_check_tsc_unstable(void)
@@ -4665,7 +4663,6 @@ static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
                return 0;
        default:
                return -ENXIO;
-               break;
        }
 }
 
@@ -6532,7 +6529,7 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
 static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
                              struct kvm_msr_filter_range *user_range)
 {
-       unsigned long *bitmap = NULL;
+       unsigned long *bitmap;
        size_t bitmap_size;
 
        if (!user_range->nmsrs)
@@ -8245,11 +8242,6 @@ static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
        return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
 }
 
-static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
-{
-       return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
-}
-
 static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
 {
        return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
@@ -8351,7 +8343,6 @@ static const struct x86_emulate_ops emulate_ops = {
        .fix_hypercall       = emulator_fix_hypercall,
        .intercept           = emulator_intercept,
        .get_cpuid           = emulator_get_cpuid,
-       .guest_has_long_mode = emulator_guest_has_long_mode,
        .guest_has_movbe     = emulator_guest_has_movbe,
        .guest_has_fxsr      = emulator_guest_has_fxsr,
        .guest_has_rdpid     = emulator_guest_has_rdpid,
@@ -9172,7 +9163,7 @@ static int kvmclock_cpu_down_prep(unsigned int cpu)
 static void tsc_khz_changed(void *data)
 {
        struct cpufreq_freqs *freq = data;
-       unsigned long khz = 0;
+       unsigned long khz;
 
        WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
 
@@ -9512,6 +9503,9 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 
        kvm_init_pmu_capability(ops->pmu_ops);
 
+       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
+
        r = ops->hardware_setup();
        if (r != 0)
                goto out_mmu_exit;
@@ -11111,12 +11105,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                        r = -EINTR;
                        goto out;
                }
+
                /*
-                * It should be impossible for the hypervisor timer to be in
-                * use before KVM has ever run the vCPU.
+                * Don't bother switching APIC timer emulation from the
+                * hypervisor timer to the software timer, the only way for the
+                * APIC timer to be active is if userspace stuffed vCPU state,
+                * i.e. put the vCPU into a nonsensical state.  Only an INIT
+                * will transition the vCPU out of UNINITIALIZED (without more
+                * state stuffing from userspace), which will reset the local
+                * APIC and thus cancel the timer or drop the IRQ (if the timer
+                * already expired).
                 */
-               WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
-
                kvm_vcpu_srcu_read_unlock(vcpu);
                kvm_vcpu_block(vcpu);
                kvm_vcpu_srcu_read_lock(vcpu);
@@ -11798,15 +11797,22 @@ static int sync_regs(struct kvm_vcpu *vcpu)
                __set_regs(vcpu, &vcpu->run->s.regs.regs);
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
        }
+
        if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
-               if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
+               struct kvm_sregs sregs = vcpu->run->s.regs.sregs;
+
+               if (__set_sregs(vcpu, &sregs))
                        return -EINVAL;
+
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
        }
+
        if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
-               if (kvm_vcpu_ioctl_x86_set_vcpu_events(
-                               vcpu, &vcpu->run->s.regs.events))
+               struct kvm_vcpu_events events = vcpu->run->s.regs.events;
+
+               if (kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events))
                        return -EINVAL;
+
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
        }
 
@@ -12627,6 +12633,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                   struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
 {
+       /*
+        * KVM doesn't support moving memslots when there are external page
+        * trackers attached to the VM, i.e. if KVMGT is in use.
+        */
+       if (change == KVM_MR_MOVE && kvm_page_track_has_external_user(kvm))
+               return -EINVAL;
+
        if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) {
                if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn())
                        return -EINVAL;
@@ -12772,7 +12785,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
                 * See is_writable_pte() for more details (the case involving
                 * access-tracked SPTEs is particularly relevant).
                 */
-               kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+               kvm_flush_remote_tlbs_memslot(kvm, new);
        }
 }
 
@@ -12781,6 +12794,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
+       if (change == KVM_MR_DELETE)
+               kvm_page_track_delete_slot(kvm, old);
+
        if (!kvm->arch.n_requested_mmu_pages &&
            (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
                unsigned long nr_mmu_pages;
@@ -12797,17 +12813,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                kvm_arch_free_memslot(kvm, old);
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-       kvm_mmu_zap_all(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-                                  struct kvm_memory_slot *slot)
-{
-       kvm_page_track_flush_slot(kvm, slot);
-}
-
 static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
 {
        return (is_guest_mode(vcpu) &&
index 82e3daf..1e7be1f 100644 (file)
@@ -323,6 +323,7 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
 
 extern u64 host_xcr0;
 extern u64 host_xss;
+extern u64 host_arch_capabilities;
 
 extern struct kvm_caps kvm_caps;
 
index a5488cc..7d79207 100644 (file)
@@ -71,6 +71,9 @@ config ARCH_HAS_ILOG2_U32
 config ARCH_HAS_ILOG2_U64
        def_bool n
 
+config ARCH_MTD_XIP
+       def_bool y
+
 config NO_IOPORT_MAP
        def_bool n
 
index 0e1bb6f..3f5ffae 100644 (file)
 #define XTENSA_STACK_ALIGNMENT 16
 #endif
 
+#ifndef XCHAL_HW_MIN_VERSION
+#if defined(XCHAL_HW_MIN_VERSION_MAJOR) && defined(XCHAL_HW_MIN_VERSION_MINOR)
+#define XCHAL_HW_MIN_VERSION (XCHAL_HW_MIN_VERSION_MAJOR * 100 + \
+                             XCHAL_HW_MIN_VERSION_MINOR)
+#else
+#define XCHAL_HW_MIN_VERSION 0
+#endif
+#endif
+
 #endif
diff --git a/arch/xtensa/include/asm/mtd-xip.h b/arch/xtensa/include/asm/mtd-xip.h
new file mode 100644 (file)
index 0000000..5143251
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_MTD_XIP_H
+#define _ASM_MTD_XIP_H
+
+#include <asm/processor.h>
+
+#define xip_irqpending()       (xtensa_get_sr(interrupt) & xtensa_get_sr(intenable))
+#define xip_currtime()         (xtensa_get_sr(ccount))
+#define xip_elapsed_since(x)   ((xtensa_get_sr(ccount) - (x)) / 1000) /* should work up to 1GHz */
+#define xip_cpu_idle()         do { asm volatile ("waiti 0"); } while (0)
+
+#endif /* _ASM_MTD_XIP_H */
+
index 3bc6b9a..e5da6d7 100644 (file)
@@ -34,6 +34,10 @@ extern char _SecondaryResetVector_text_start[];
 extern char _SecondaryResetVector_text_end[];
 #endif
 #ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_VECTORS_ADDR
+extern char _xip_text_start[];
+extern char _xip_text_end[];
+#endif
 extern char _xip_start[];
 extern char _xip_end[];
 #endif
index a0d05c8..1836180 100644 (file)
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 
+#include <asm/core.h>
 #include <asm/processor.h>
 #include <asm/stacktrace.h>
 
+#define XTENSA_HWVERSION_RG_2015_0     260000
+
+#if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
+#define XTENSA_PMU_ERI_BASE            0x00101000
+#else
+#define XTENSA_PMU_ERI_BASE            0x00001000
+#endif
+
 /* Global control/status for all perf counters */
-#define XTENSA_PMU_PMG                 0x1000
+#define XTENSA_PMU_PMG                 XTENSA_PMU_ERI_BASE
 /* Perf counter values */
-#define XTENSA_PMU_PM(i)               (0x1080 + (i) * 4)
+#define XTENSA_PMU_PM(i)               (XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
 /* Perf counter control registers */
-#define XTENSA_PMU_PMCTRL(i)           (0x1100 + (i) * 4)
+#define XTENSA_PMU_PMCTRL(i)           (XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
 /* Perf counter status registers */
-#define XTENSA_PMU_PMSTAT(i)           (0x1180 + (i) * 4)
+#define XTENSA_PMU_PMSTAT(i)           (XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)
 
 #define XTENSA_PMU_PMG_PMEN            0x1
 
index aba3ff4..52d6e48 100644 (file)
@@ -311,6 +311,9 @@ void __init setup_arch(char **cmdline_p)
 
        mem_reserve(__pa(_stext), __pa(_end));
 #ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_VECTORS_ADDR
+       mem_reserve(__pa(_xip_text_start), __pa(_xip_text_end));
+#endif
        mem_reserve(__pa(_xip_start), __pa(_xip_end));
 #endif
 
index c14fd96..f47e9bb 100644 (file)
@@ -118,6 +118,7 @@ SECTIONS
     SECTION_VECTOR2 (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
 
     *(.exception.text)
+    *(.xiptext)
 #endif
 
     IRQENTRY_TEXT
@@ -201,6 +202,9 @@ SECTIONS
                   .DebugInterruptVector.text);
     RELOCATE_ENTRY(_exception_text,
                   .exception.text);
+#ifdef CONFIG_XIP_KERNEL
+    RELOCATE_ENTRY(_xip_text, .xiptext);
+#endif
 #endif
 #ifdef CONFIG_XIP_KERNEL
     RELOCATE_ENTRY(_xip_data, .data);
@@ -319,7 +323,12 @@ SECTIONS
                  LAST)
 #undef LAST
 #define LAST .exception.text
-
+  SECTION_VECTOR4 (_xip_text,
+                 .xiptext,
+                 ,
+                 LAST)
+#undef LAST
+#define LAST .xiptext
 #endif
   . = (LOADADDR(LAST) + SIZEOF(LAST) + 3) & ~ 3;
 
index 44d74a3..8584bab 100644 (file)
@@ -315,12 +315,11 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
                                        n = bytes;
 
                                if (!bio_add_hw_page(rq->q, bio, page, n, offs,
-                                                    max_sectors, &same_page)) {
-                                       if (same_page)
-                                               bio_release_page(bio, page);
+                                                    max_sectors, &same_page))
                                        break;
-                               }
 
+                               if (same_page)
+                                       bio_release_page(bio, page);
                                bytes -= n;
                                offs = 0;
                        }
index 7397ff1..38a881c 100644 (file)
@@ -697,11 +697,41 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
        return true;
 }
 
+static unsigned int calculate_io_allowed(u32 iops_limit,
+                                        unsigned long jiffy_elapsed)
+{
+       unsigned int io_allowed;
+       u64 tmp;
+
+       /*
+        * jiffy_elapsed should not be a big value as minimum iops can be
+        * 1 then at max jiffy elapsed should be equivalent of 1 second as we
+        * will allow dispatch after 1 second and after that slice should
+        * have been trimmed.
+        */
+
+       tmp = (u64)iops_limit * jiffy_elapsed;
+       do_div(tmp, HZ);
+
+       if (tmp > UINT_MAX)
+               io_allowed = UINT_MAX;
+       else
+               io_allowed = tmp;
+
+       return io_allowed;
+}
+
+static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
+{
+       return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
+}
+
 /* Trim the used slices and adjust slice start accordingly */
 static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
 {
-       unsigned long nr_slices, time_elapsed, io_trim;
-       u64 bytes_trim, tmp;
+       unsigned long time_elapsed;
+       long long bytes_trim;
+       int io_trim;
 
        BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw]));
 
@@ -723,67 +753,38 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
 
        throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
 
-       time_elapsed = jiffies - tg->slice_start[rw];
-
-       nr_slices = time_elapsed / tg->td->throtl_slice;
-
-       if (!nr_slices)
+       time_elapsed = rounddown(jiffies - tg->slice_start[rw],
+                                tg->td->throtl_slice);
+       if (!time_elapsed)
                return;
-       tmp = tg_bps_limit(tg, rw) * tg->td->throtl_slice * nr_slices;
-       do_div(tmp, HZ);
-       bytes_trim = tmp;
 
-       io_trim = (tg_iops_limit(tg, rw) * tg->td->throtl_slice * nr_slices) /
-               HZ;
-
-       if (!bytes_trim && !io_trim)
+       bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw),
+                                            time_elapsed) +
+                    tg->carryover_bytes[rw];
+       io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed) +
+                 tg->carryover_ios[rw];
+       if (bytes_trim <= 0 && io_trim <= 0)
                return;
 
-       if (tg->bytes_disp[rw] >= bytes_trim)
+       tg->carryover_bytes[rw] = 0;
+       if ((long long)tg->bytes_disp[rw] >= bytes_trim)
                tg->bytes_disp[rw] -= bytes_trim;
        else
                tg->bytes_disp[rw] = 0;
 
-       if (tg->io_disp[rw] >= io_trim)
+       tg->carryover_ios[rw] = 0;
+       if ((int)tg->io_disp[rw] >= io_trim)
                tg->io_disp[rw] -= io_trim;
        else
                tg->io_disp[rw] = 0;
 
-       tg->slice_start[rw] += nr_slices * tg->td->throtl_slice;
+       tg->slice_start[rw] += time_elapsed;
 
        throtl_log(&tg->service_queue,
-                  "[%c] trim slice nr=%lu bytes=%llu io=%lu start=%lu end=%lu jiffies=%lu",
-                  rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
-                  tg->slice_start[rw], tg->slice_end[rw], jiffies);
-}
-
-static unsigned int calculate_io_allowed(u32 iops_limit,
-                                        unsigned long jiffy_elapsed)
-{
-       unsigned int io_allowed;
-       u64 tmp;
-
-       /*
-        * jiffy_elapsed should not be a big value as minimum iops can be
-        * 1 then at max jiffy elapsed should be equivalent of 1 second as we
-        * will allow dispatch after 1 second and after that slice should
-        * have been trimmed.
-        */
-
-       tmp = (u64)iops_limit * jiffy_elapsed;
-       do_div(tmp, HZ);
-
-       if (tmp > UINT_MAX)
-               io_allowed = UINT_MAX;
-       else
-               io_allowed = tmp;
-
-       return io_allowed;
-}
-
-static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
-{
-       return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
+                  "[%c] trim slice nr=%lu bytes=%lld io=%d start=%lu end=%lu jiffies=%lu",
+                  rw == READ ? 'R' : 'W', time_elapsed / tg->td->throtl_slice,
+                  bytes_trim, io_trim, tg->slice_start[rw], tg->slice_end[rw],
+                  jiffies);
 }
 
 static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
@@ -816,7 +817,7 @@ static void tg_update_carryover(struct throtl_grp *tg)
                __tg_update_carryover(tg, WRITE);
 
        /* see comments in struct throtl_grp for meaning of these fields. */
-       throtl_log(&tg->service_queue, "%s: %llu %llu %u %u\n", __func__,
+       throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__,
                   tg->carryover_bytes[READ], tg->carryover_bytes[WRITE],
                   tg->carryover_ios[READ], tg->carryover_ios[WRITE]);
 }
@@ -825,7 +826,7 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
                                 u32 iops_limit)
 {
        bool rw = bio_data_dir(bio);
-       unsigned int io_allowed;
+       int io_allowed;
        unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
 
        if (iops_limit == UINT_MAX) {
@@ -838,9 +839,8 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
        jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
        io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
                     tg->carryover_ios[rw];
-       if (tg->io_disp[rw] + 1 <= io_allowed) {
+       if (io_allowed > 0 && tg->io_disp[rw] + 1 <= io_allowed)
                return 0;
-       }
 
        /* Calc approx time to dispatch */
        jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
@@ -851,7 +851,8 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
                                u64 bps_limit)
 {
        bool rw = bio_data_dir(bio);
-       u64 bytes_allowed, extra_bytes;
+       long long bytes_allowed;
+       u64 extra_bytes;
        unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
        unsigned int bio_size = throtl_bio_data_size(bio);
 
@@ -869,9 +870,8 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
        jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
        bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
                        tg->carryover_bytes[rw];
-       if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
+       if (bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed)
                return 0;
-       }
 
        /* Calc approx time to dispatch */
        extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
index d1ccbfe..bffbc9c 100644 (file)
@@ -127,8 +127,8 @@ struct throtl_grp {
         * bytes/ios are waited already in previous configuration, and they will
         * be used to calculate wait time under new configuration.
         */
-       uint64_t carryover_bytes[2];
-       unsigned int carryover_ios[2];
+       long long carryover_bytes[2];
+       int carryover_ios[2];
 
        unsigned long last_check_time;
 
index a24a624..acff3d5 100644 (file)
@@ -671,10 +671,6 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
                iov_iter_truncate(from, size);
        }
 
-       ret = file_remove_privs(file);
-       if (ret)
-               return ret;
-
        ret = file_update_time(file);
        if (ret)
                return ret;
index 648670d..d5f5cd6 100644 (file)
@@ -20,6 +20,8 @@ static int blkpg_do_ioctl(struct block_device *bdev,
        struct blkpg_partition p;
        long long start, length;
 
+       if (disk->flags & GENHD_FL_NO_PART)
+               return -EINVAL;
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
        if (copy_from_user(&p, upart, sizeof(struct blkpg_partition)))
index 496ca02..efb66e2 100644 (file)
@@ -15,6 +15,8 @@ source "drivers/base/Kconfig"
 
 source "drivers/bus/Kconfig"
 
+source "drivers/cache/Kconfig"
+
 source "drivers/connector/Kconfig"
 
 source "drivers/firmware/Kconfig"
index 0957f63..cb0afca 100644 (file)
@@ -11,6 +11,7 @@ ifdef building_out_of_srctree
 MAKEFLAGS += --include-dir=$(srctree)
 endif
 
+obj-y                          += cache/
 obj-y                          += irqchip/
 obj-y                          += bus/
 
index 831bfd2..bdddef2 100644 (file)
@@ -118,8 +118,7 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size
        struct vpu_jsm_msg resp;
        int ret;
 
-       if (!strncpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN - 1))
-               return -ENOMEM;
+       strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN);
 
        ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp,
                                    VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
index 419590f..f14e682 100644 (file)
@@ -492,26 +492,22 @@ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp)
 }
 
 static int thermal_get_trend(struct thermal_zone_device *thermal,
-                            int trip_index, enum thermal_trend *trend)
+                            struct thermal_trip *trip,
+                            enum thermal_trend *trend)
 {
        struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
        struct acpi_thermal_trip *acpi_trip;
-       int t, i;
+       int t;
 
-       if (!tz || trip_index < 0)
+       if (!tz || !trip)
                return -EINVAL;
 
-       if (tz->trips.critical.valid)
-               trip_index--;
-
-       if (tz->trips.hot.valid)
-               trip_index--;
-
-       if (trip_index < 0)
+       acpi_trip = trip->priv;
+       if (!acpi_trip || !acpi_trip->valid)
                return -EINVAL;
 
-       acpi_trip = &tz->trips.passive.trip;
-       if (acpi_trip->valid && !trip_index--) {
+       switch (trip->type) {
+       case THERMAL_TRIP_PASSIVE:
                t = tz->trips.passive.tc1 * (tz->temperature -
                                                tz->last_temperature) +
                        tz->trips.passive.tc2 * (tz->temperature -
@@ -524,19 +520,18 @@ static int thermal_get_trend(struct thermal_zone_device *thermal,
                        *trend = THERMAL_TREND_STABLE;
 
                return 0;
-       }
-
-       t = acpi_thermal_temp(tz, tz->temperature);
 
-       for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
-               acpi_trip = &tz->trips.active[i].trip;
-               if (acpi_trip->valid && !trip_index--) {
-                       if (t > acpi_thermal_temp(tz, acpi_trip->temperature)) {
-                               *trend = THERMAL_TREND_RAISING;
-                               return 0;
-                       }
+       case THERMAL_TRIP_ACTIVE:
+               t = acpi_thermal_temp(tz, tz->temperature);
+               if (t <= trip->temperature)
                        break;
-               }
+
+               *trend = THERMAL_TREND_RAISING;
+
+               return 0;
+
+       default:
+               break;
        }
 
        return -EINVAL;
index addba10..abb5911 100644 (file)
@@ -421,6 +421,8 @@ static const struct pci_device_id ahci_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */
        { PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */
        { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */
+       /* Elkhart Lake IDs 0x4b60 & 0x4b62 https://sata-io.org/product/8803 not tested yet */
+       { PCI_VDEVICE(INTEL, 0x4b63), board_ahci_low_power }, /* Elkhart Lake AHCI */
 
        /* JMicron 360/1/3/5/6, match class to avoid IDE function */
        { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
@@ -807,7 +809,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class,
 static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
                              unsigned long deadline)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        struct ata_port *ap = link->ap;
        struct ahci_port_priv *pp = ap->private_data;
        struct ahci_host_priv *hpriv = ap->host->private_data;
index c2b6be0..64f7f7d 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/libata.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include "ahci.h"
index 9604a2f..ed263de 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/log2.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/regmap.h>
index 5083fb6..adc851c 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/libata.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/regmap.h>
index 7645015..f318735 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/mbus.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include "ahci.h"
 
index e89807f..9accf89 100644 (file)
@@ -31,13 +31,11 @@ static int ahci_octeon_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct device_node *node = dev->of_node;
-       struct resource *res;
        void __iomem *base;
        u64 cfg;
        int ret;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       base = devm_ioremap_resource(&pdev->dev, res);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base))
                return PTR_ERR(base);
 
index 3d01b11..b1a4e57 100644 (file)
@@ -12,9 +12,7 @@
 #include <linux/pm.h>
 #include <linux/ahci_platform.h>
 #include <linux/device.h>
-#include <linux/of_address.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/libata.h>
 #include "ahci.h"
@@ -90,7 +88,7 @@ MODULE_DEVICE_TABLE(acpi, ahci_qoriq_acpi_match);
 static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class,
                          unsigned long deadline)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        void __iomem *port_mmio = ahci_port_base(link->ap);
        u32 px_cmd, px_is, px_val;
        struct ata_port *ap = link->ap;
index 2c32d58..59f97aa 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/device.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/libata.h>
 #include <linux/ahci_platform.h>
@@ -132,8 +131,7 @@ static const struct ata_port_info *ahci_seattle_get_port_info(
        if (!plat_data)
                return &ahci_port_info;
 
-       plat_data->sgpio_ctrl = devm_ioremap_resource(dev,
-                             platform_get_resource(pdev, IORESOURCE_MEM, 1));
+       plat_data->sgpio_ctrl = devm_platform_ioremap_resource(pdev, 1);
        if (IS_ERR(plat_data->sgpio_ctrl))
                return &ahci_port_info;
 
index 04531fa..58b2683 100644 (file)
@@ -13,8 +13,8 @@
 #include <linux/clk.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include "ahci.h"
index 21c2079..8703c2a 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
@@ -530,8 +530,7 @@ static int tegra_ahci_probe(struct platform_device *pdev)
        tegra->pdev = pdev;
        tegra->soc = of_device_get_match_data(&pdev->dev);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       tegra->sata_regs = devm_ioremap_resource(&pdev->dev, res);
+       tegra->sata_regs = devm_platform_ioremap_resource(pdev, 1);
        if (IS_ERR(tegra->sata_regs))
                return PTR_ERR(tegra->sata_regs);
 
index eb773f2..ccef5e6 100644 (file)
@@ -110,9 +110,8 @@ static int xgene_ahci_init_memram(struct xgene_ahci_context *ctx)
  * @timeout : timeout for achieving the value.
  */
 static int xgene_ahci_poll_reg_val(struct ata_port *ap,
-                                  void __iomem *reg, unsigned
-                                  int val, unsigned long interval,
-                                  unsigned long timeout)
+                                  void __iomem *reg, unsigned int val,
+                                  unsigned int interval, unsigned int timeout)
 {
        unsigned long deadline;
        unsigned int tmp;
@@ -350,7 +349,7 @@ static void xgene_ahci_set_phy_cfg(struct xgene_ahci_context *ctx, int channel)
 static int xgene_ahci_do_hardreset(struct ata_link *link,
                                   unsigned long deadline, bool *online)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        struct ata_port *ap = link->ap;
        struct ahci_host_priv *hpriv = ap->host->private_data;
        struct xgene_ahci_context *ctx = hpriv->plat_data;
@@ -755,20 +754,17 @@ static int xgene_ahci_probe(struct platform_device *pdev)
        ctx->dev = dev;
 
        /* Retrieve the IP core resource */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       ctx->csr_core = devm_ioremap_resource(dev, res);
+       ctx->csr_core = devm_platform_ioremap_resource(pdev, 1);
        if (IS_ERR(ctx->csr_core))
                return PTR_ERR(ctx->csr_core);
 
        /* Retrieve the IP diagnostic resource */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-       ctx->csr_diag = devm_ioremap_resource(dev, res);
+       ctx->csr_diag = devm_platform_ioremap_resource(pdev, 2);
        if (IS_ERR(ctx->csr_diag))
                return PTR_ERR(ctx->csr_diag);
 
        /* Retrieve the IP AXI resource */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
-       ctx->csr_axi = devm_ioremap_resource(dev, res);
+       ctx->csr_axi = devm_platform_ioremap_resource(pdev, 3);
        if (IS_ERR(ctx->csr_axi))
                return PTR_ERR(ctx->csr_axi);
 
index 06aec35..e2baced 100644 (file)
@@ -1403,7 +1403,7 @@ EXPORT_SYMBOL_GPL(ahci_kick_engine);
 
 static int ahci_exec_polled_cmd(struct ata_port *ap, int pmp,
                                struct ata_taskfile *tf, int is_cmd, u16 flags,
-                               unsigned long timeout_msec)
+                               unsigned int timeout_msec)
 {
        const u32 cmd_fis_len = 5; /* five dwords */
        struct ahci_port_priv *pp = ap->private_data;
@@ -1448,7 +1448,8 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class,
        struct ahci_host_priv *hpriv = ap->host->private_data;
        struct ahci_port_priv *pp = ap->private_data;
        const char *reason = NULL;
-       unsigned long now, msecs;
+       unsigned long now;
+       unsigned int msecs;
        struct ata_taskfile tf;
        bool fbs_disabled = false;
        int rc;
@@ -1587,7 +1588,7 @@ static int ahci_pmp_retry_softreset(struct ata_link *link, unsigned int *class,
 int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
                      unsigned long deadline, bool *online)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        struct ata_port *ap = link->ap;
        struct ahci_port_priv *pp = ap->private_data;
        struct ahci_host_priv *hpriv = ap->host->private_data;
index 9a8d43f..581704e 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/ahci_platform.h>
 #include <linux/phy/phy.h>
 #include <linux/pm_runtime.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/reset.h>
 #include "ahci.h"
index 04db0f2..7431431 100644 (file)
@@ -1586,13 +1586,11 @@ static unsigned ata_exec_internal_sg(struct ata_device *dev,
                }
        }
 
-       if (ap->ops->error_handler)
-               ata_eh_release(ap);
+       ata_eh_release(ap);
 
        rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout));
 
-       if (ap->ops->error_handler)
-               ata_eh_acquire(ap);
+       ata_eh_acquire(ap);
 
        ata_sff_flush_pio_task(ap);
 
@@ -1607,10 +1605,7 @@ static unsigned ata_exec_internal_sg(struct ata_device *dev,
                if (qc->flags & ATA_QCFLAG_ACTIVE) {
                        qc->err_mask |= AC_ERR_TIMEOUT;
 
-                       if (ap->ops->error_handler)
-                               ata_port_freeze(ap);
-                       else
-                               ata_qc_complete(qc);
+                       ata_port_freeze(ap);
 
                        ata_dev_warn(dev, "qc timeout after %u msecs (cmd 0x%x)\n",
                                     timeout, command);
@@ -3063,144 +3058,6 @@ int ata_cable_sata(struct ata_port *ap)
 EXPORT_SYMBOL_GPL(ata_cable_sata);
 
 /**
- *     ata_bus_probe - Reset and probe ATA bus
- *     @ap: Bus to probe
- *
- *     Master ATA bus probing function.  Initiates a hardware-dependent
- *     bus reset, then attempts to identify any devices found on
- *     the bus.
- *
- *     LOCKING:
- *     PCI/etc. bus probe sem.
- *
- *     RETURNS:
- *     Zero on success, negative errno otherwise.
- */
-
-int ata_bus_probe(struct ata_port *ap)
-{
-       unsigned int classes[ATA_MAX_DEVICES];
-       int tries[ATA_MAX_DEVICES];
-       int rc;
-       struct ata_device *dev;
-
-       ata_for_each_dev(dev, &ap->link, ALL)
-               tries[dev->devno] = ATA_PROBE_MAX_TRIES;
-
- retry:
-       ata_for_each_dev(dev, &ap->link, ALL) {
-               /* If we issue an SRST then an ATA drive (not ATAPI)
-                * may change configuration and be in PIO0 timing. If
-                * we do a hard reset (or are coming from power on)
-                * this is true for ATA or ATAPI. Until we've set a
-                * suitable controller mode we should not touch the
-                * bus as we may be talking too fast.
-                */
-               dev->pio_mode = XFER_PIO_0;
-               dev->dma_mode = 0xff;
-
-               /* If the controller has a pio mode setup function
-                * then use it to set the chipset to rights. Don't
-                * touch the DMA setup as that will be dealt with when
-                * configuring devices.
-                */
-               if (ap->ops->set_piomode)
-                       ap->ops->set_piomode(ap, dev);
-       }
-
-       /* reset and determine device classes */
-       ap->ops->phy_reset(ap);
-
-       ata_for_each_dev(dev, &ap->link, ALL) {
-               if (dev->class != ATA_DEV_UNKNOWN)
-                       classes[dev->devno] = dev->class;
-               else
-                       classes[dev->devno] = ATA_DEV_NONE;
-
-               dev->class = ATA_DEV_UNKNOWN;
-       }
-
-       /* read IDENTIFY page and configure devices. We have to do the identify
-          specific sequence bass-ackwards so that PDIAG- is released by
-          the slave device */
-
-       ata_for_each_dev(dev, &ap->link, ALL_REVERSE) {
-               if (tries[dev->devno])
-                       dev->class = classes[dev->devno];
-
-               if (!ata_dev_enabled(dev))
-                       continue;
-
-               rc = ata_dev_read_id(dev, &dev->class, ATA_READID_POSTRESET,
-                                    dev->id);
-               if (rc)
-                       goto fail;
-       }
-
-       /* Now ask for the cable type as PDIAG- should have been released */
-       if (ap->ops->cable_detect)
-               ap->cbl = ap->ops->cable_detect(ap);
-
-       /* We may have SATA bridge glue hiding here irrespective of
-        * the reported cable types and sensed types.  When SATA
-        * drives indicate we have a bridge, we don't know which end
-        * of the link the bridge is which is a problem.
-        */
-       ata_for_each_dev(dev, &ap->link, ENABLED)
-               if (ata_id_is_sata(dev->id))
-                       ap->cbl = ATA_CBL_SATA;
-
-       /* After the identify sequence we can now set up the devices. We do
-          this in the normal order so that the user doesn't get confused */
-
-       ata_for_each_dev(dev, &ap->link, ENABLED) {
-               ap->link.eh_context.i.flags |= ATA_EHI_PRINTINFO;
-               rc = ata_dev_configure(dev);
-               ap->link.eh_context.i.flags &= ~ATA_EHI_PRINTINFO;
-               if (rc)
-                       goto fail;
-       }
-
-       /* configure transfer mode */
-       rc = ata_set_mode(&ap->link, &dev);
-       if (rc)
-               goto fail;
-
-       ata_for_each_dev(dev, &ap->link, ENABLED)
-               return 0;
-
-       return -ENODEV;
-
- fail:
-       tries[dev->devno]--;
-
-       switch (rc) {
-       case -EINVAL:
-               /* eeek, something went very wrong, give up */
-               tries[dev->devno] = 0;
-               break;
-
-       case -ENODEV:
-               /* give it just one more chance */
-               tries[dev->devno] = min(tries[dev->devno], 1);
-               fallthrough;
-       case -EIO:
-               if (tries[dev->devno] == 1) {
-                       /* This is the last chance, better to slow
-                        * down than lose it.
-                        */
-                       sata_down_spd_limit(&ap->link, 0);
-                       ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
-               }
-       }
-
-       if (!tries[dev->devno])
-               ata_dev_disable(dev);
-
-       goto retry;
-}
-
-/**
  *     sata_print_link_status - Print SATA link status
  *     @link: SATA link to printk link status about
  *
@@ -3782,7 +3639,7 @@ int ata_std_prereset(struct ata_link *link, unsigned long deadline)
 {
        struct ata_port *ap = link->ap;
        struct ata_eh_context *ehc = &link->eh_context;
-       const unsigned long *timing = sata_ehc_deb_timing(ehc);
+       const unsigned int *timing = sata_ehc_deb_timing(ehc);
        int rc;
 
        /* if we're about to do hardreset, nothing more to do */
@@ -3824,7 +3681,7 @@ EXPORT_SYMBOL_GPL(ata_std_prereset);
 int sata_std_hardreset(struct ata_link *link, unsigned int *class,
                       unsigned long deadline)
 {
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        bool online;
        int rc;
 
@@ -4213,10 +4070,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
                                                ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Micron_M500_*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM },
-       { "Crucial_CT*M500*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
-                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Micron_M5[15]0_*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM },
+       { "Micron_1100_*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Crucial_CT*M500*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*M550*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM },
        { "Crucial_CT*MX100*",          "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
@@ -4874,126 +4733,103 @@ static void ata_verify_xfer(struct ata_queued_cmd *qc)
 void ata_qc_complete(struct ata_queued_cmd *qc)
 {
        struct ata_port *ap = qc->ap;
+       struct ata_device *dev = qc->dev;
+       struct ata_eh_info *ehi = &dev->link->eh_info;
 
        /* Trigger the LED (if available) */
        ledtrig_disk_activity(!!(qc->tf.flags & ATA_TFLAG_WRITE));
 
-       /* XXX: New EH and old EH use different mechanisms to
-        * synchronize EH with regular execution path.
-        *
-        * In new EH, a qc owned by EH is marked with ATA_QCFLAG_EH.
-        * Normal execution path is responsible for not accessing a
-        * qc owned by EH.  libata core enforces the rule by returning NULL
-        * from ata_qc_from_tag() for qcs owned by EH.
+       /*
+        * In order to synchronize EH with the regular execution path, a qc that
+        * is owned by EH is marked with ATA_QCFLAG_EH.
         *
-        * Old EH depends on ata_qc_complete() nullifying completion
-        * requests if ATA_QCFLAG_EH_SCHEDULED is set.  Old EH does
-        * not synchronize with interrupt handler.  Only PIO task is
-        * taken care of.
+        * The normal execution path is responsible for not accessing a qc owned
+        * by EH.  libata core enforces the rule by returning NULL from
+        * ata_qc_from_tag() for qcs owned by EH.
         */
-       if (ap->ops->error_handler) {
-               struct ata_device *dev = qc->dev;
-               struct ata_eh_info *ehi = &dev->link->eh_info;
-
-               if (unlikely(qc->err_mask))
-                       qc->flags |= ATA_QCFLAG_EH;
+       if (unlikely(qc->err_mask))
+               qc->flags |= ATA_QCFLAG_EH;
 
-               /*
-                * Finish internal commands without any further processing
-                * and always with the result TF filled.
-                */
-               if (unlikely(ata_tag_internal(qc->tag))) {
-                       fill_result_tf(qc);
-                       trace_ata_qc_complete_internal(qc);
-                       __ata_qc_complete(qc);
-                       return;
-               }
+       /*
+        * Finish internal commands without any further processing and always
+        * with the result TF filled.
+        */
+       if (unlikely(ata_tag_internal(qc->tag))) {
+               fill_result_tf(qc);
+               trace_ata_qc_complete_internal(qc);
+               __ata_qc_complete(qc);
+               return;
+       }
 
-               /*
-                * Non-internal qc has failed.  Fill the result TF and
-                * summon EH.
-                */
-               if (unlikely(qc->flags & ATA_QCFLAG_EH)) {
-                       fill_result_tf(qc);
-                       trace_ata_qc_complete_failed(qc);
-                       ata_qc_schedule_eh(qc);
-                       return;
-               }
+       /* Non-internal qc has failed.  Fill the result TF and summon EH. */
+       if (unlikely(qc->flags & ATA_QCFLAG_EH)) {
+               fill_result_tf(qc);
+               trace_ata_qc_complete_failed(qc);
+               ata_qc_schedule_eh(qc);
+               return;
+       }
 
-               WARN_ON_ONCE(ata_port_is_frozen(ap));
+       WARN_ON_ONCE(ata_port_is_frozen(ap));
 
-               /* read result TF if requested */
-               if (qc->flags & ATA_QCFLAG_RESULT_TF)
-                       fill_result_tf(qc);
+       /* read result TF if requested */
+       if (qc->flags & ATA_QCFLAG_RESULT_TF)
+               fill_result_tf(qc);
 
-               trace_ata_qc_complete_done(qc);
+       trace_ata_qc_complete_done(qc);
 
+       /*
+        * For CDL commands that completed without an error, check if we have
+        * sense data (ATA_SENSE is set). If we do, then the command may have
+        * been aborted by the device due to a limit timeout using the policy
+        * 0xD. For these commands, invoke EH to get the command sense data.
+        */
+       if (qc->result_tf.status & ATA_SENSE &&
+           ((ata_is_ncq(qc->tf.protocol) &&
+             dev->flags & ATA_DFLAG_CDL_ENABLED) ||
+            (!ata_is_ncq(qc->tf.protocol) &&
+             ata_id_sense_reporting_enabled(dev->id)))) {
                /*
-                * For CDL commands that completed without an error, check if
-                * we have sense data (ATA_SENSE is set). If we do, then the
-                * command may have been aborted by the device due to a limit
-                * timeout using the policy 0xD. For these commands, invoke EH
-                * to get the command sense data.
+                * Tell SCSI EH to not overwrite scmd->result even if this
+                * command is finished with result SAM_STAT_GOOD.
                 */
-               if (qc->result_tf.status & ATA_SENSE &&
-                   ((ata_is_ncq(qc->tf.protocol) &&
-                     dev->flags & ATA_DFLAG_CDL_ENABLED) ||
-                    (!ata_is_ncq(qc->tf.protocol) &&
-                     ata_id_sense_reporting_enabled(dev->id)))) {
-                       /*
-                        * Tell SCSI EH to not overwrite scmd->result even if
-                        * this command is finished with result SAM_STAT_GOOD.
-                        */
-                       qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS;
-                       qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD;
-                       ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE;
+               qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS;
+               qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD;
+               ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE;
 
-                       /*
-                        * set pending so that ata_qc_schedule_eh() does not
-                        * trigger fast drain, and freeze the port.
-                        */
-                       ap->pflags |= ATA_PFLAG_EH_PENDING;
-                       ata_qc_schedule_eh(qc);
-                       return;
-               }
-
-               /* Some commands need post-processing after successful
-                * completion.
+               /*
+                * set pending so that ata_qc_schedule_eh() does not trigger
+                * fast drain, and freeze the port.
                 */
-               switch (qc->tf.command) {
-               case ATA_CMD_SET_FEATURES:
-                       if (qc->tf.feature != SETFEATURES_WC_ON &&
-                           qc->tf.feature != SETFEATURES_WC_OFF &&
-                           qc->tf.feature != SETFEATURES_RA_ON &&
-                           qc->tf.feature != SETFEATURES_RA_OFF)
-                               break;
-                       fallthrough;
-               case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */
-               case ATA_CMD_SET_MULTI: /* multi_count changed */
-                       /* revalidate device */
-                       ehi->dev_action[dev->devno] |= ATA_EH_REVALIDATE;
-                       ata_port_schedule_eh(ap);
-                       break;
+               ap->pflags |= ATA_PFLAG_EH_PENDING;
+               ata_qc_schedule_eh(qc);
+               return;
+       }
 
-               case ATA_CMD_SLEEP:
-                       dev->flags |= ATA_DFLAG_SLEEPING;
+       /* Some commands need post-processing after successful completion. */
+       switch (qc->tf.command) {
+       case ATA_CMD_SET_FEATURES:
+               if (qc->tf.feature != SETFEATURES_WC_ON &&
+                   qc->tf.feature != SETFEATURES_WC_OFF &&
+                   qc->tf.feature != SETFEATURES_RA_ON &&
+                   qc->tf.feature != SETFEATURES_RA_OFF)
                        break;
-               }
-
-               if (unlikely(dev->flags & ATA_DFLAG_DUBIOUS_XFER))
-                       ata_verify_xfer(qc);
+               fallthrough;
+       case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */
+       case ATA_CMD_SET_MULTI: /* multi_count changed */
+               /* revalidate device */
+               ehi->dev_action[dev->devno] |= ATA_EH_REVALIDATE;
+               ata_port_schedule_eh(ap);
+               break;
 
-               __ata_qc_complete(qc);
-       } else {
-               if (qc->flags & ATA_QCFLAG_EH_SCHEDULED)
-                       return;
+       case ATA_CMD_SLEEP:
+               dev->flags |= ATA_DFLAG_SLEEPING;
+               break;
+       }
 
-               /* read result TF if failed or requested */
-               if (qc->err_mask || qc->flags & ATA_QCFLAG_RESULT_TF)
-                       fill_result_tf(qc);
+       if (unlikely(dev->flags & ATA_DFLAG_DUBIOUS_XFER))
+               ata_verify_xfer(qc);
 
-               __ata_qc_complete(qc);
-       }
+       __ata_qc_complete(qc);
 }
 EXPORT_SYMBOL_GPL(ata_qc_complete);
 
@@ -5039,11 +4875,8 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
        struct ata_link *link = qc->dev->link;
        u8 prot = qc->tf.protocol;
 
-       /* Make sure only one non-NCQ command is outstanding.  The
-        * check is skipped for old EH because it reuses active qc to
-        * request ATAPI sense.
-        */
-       WARN_ON_ONCE(ap->ops->error_handler && ata_tag_valid(link->active_tag));
+       /* Make sure only one non-NCQ command is outstanding. */
+       WARN_ON_ONCE(ata_tag_valid(link->active_tag));
 
        if (ata_is_ncq(prot)) {
                WARN_ON_ONCE(link->sactive & (1 << qc->hw_tag));
@@ -5896,7 +5729,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
 }
 EXPORT_SYMBOL_GPL(ata_host_init);
 
-void __ata_port_probe(struct ata_port *ap)
+void ata_port_probe(struct ata_port *ap)
 {
        struct ata_eh_info *ehi = &ap->link.eh_info;
        unsigned long flags;
@@ -5914,20 +5747,7 @@ void __ata_port_probe(struct ata_port *ap)
 
        spin_unlock_irqrestore(ap->lock, flags);
 }
-
-int ata_port_probe(struct ata_port *ap)
-{
-       int rc = 0;
-
-       if (ap->ops->error_handler) {
-               __ata_port_probe(ap);
-               ata_port_wait_eh(ap);
-       } else {
-               rc = ata_bus_probe(ap);
-       }
-       return rc;
-}
-
+EXPORT_SYMBOL_GPL(ata_port_probe);
 
 static void async_port_probe(void *data, async_cookie_t cookie)
 {
@@ -5943,7 +5763,8 @@ static void async_port_probe(void *data, async_cookie_t cookie)
        if (!(ap->host->flags & ATA_HOST_PARALLEL_SCAN) && ap->port_no != 0)
                async_synchronize_cookie(cookie);
 
-       (void)ata_port_probe(ap);
+       ata_port_probe(ap);
+       ata_port_wait_eh(ap);
 
        /* in order to keep device order, we need to synchronize at this point */
        async_synchronize_cookie(cookie);
@@ -6130,9 +5951,6 @@ static void ata_port_detach(struct ata_port *ap)
        struct ata_link *link;
        struct ata_device *dev;
 
-       if (!ap->ops->error_handler)
-               goto skip_eh;
-
        /* tell EH we're leaving & flush EH */
        spin_lock_irqsave(ap->lock, flags);
        ap->pflags |= ATA_PFLAG_UNLOADING;
@@ -6148,7 +5966,6 @@ static void ata_port_detach(struct ata_port *ap)
        cancel_delayed_work_sync(&ap->hotplug_task);
        cancel_delayed_work_sync(&ap->scsi_rescan_task);
 
- skip_eh:
        /* clean up zpodd on port removal */
        ata_for_each_link(link, ap, HOST_FIRST) {
                ata_for_each_dev(dev, link, ALL) {
@@ -6684,7 +6501,7 @@ EXPORT_SYMBOL_GPL(ata_msleep);
  *     The final register value.
  */
 u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val,
-                     unsigned long interval, unsigned long timeout)
+                     unsigned int interval, unsigned int timeout)
 {
        unsigned long deadline;
        u32 tmp;
index 35e0367..159ba6b 100644 (file)
@@ -78,12 +78,12 @@ enum {
  * are mostly for error handling, hotplug and those outlier devices that
  * take an exceptionally long time to recover from reset.
  */
-static const unsigned long ata_eh_reset_timeouts[] = {
+static const unsigned int ata_eh_reset_timeouts[] = {
        10000,  /* most drives spin up by 10sec */
        10000,  /* > 99% working drives spin up before 20sec */
        35000,  /* give > 30 secs of idleness for outlier devices */
         5000,  /* and sweet one last chance */
-       ULONG_MAX, /* > 1 min has elapsed, give up */
+       UINT_MAX, /* > 1 min has elapsed, give up */
 };
 
 static const unsigned int ata_eh_identify_timeouts[] = {
@@ -571,13 +571,10 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
        /* make sure sff pio task is not running */
        ata_sff_flush_pio_task(ap);
 
-       if (!ap->ops->error_handler)
-               return;
-
        /* synchronize with host lock and sort out timeouts */
 
        /*
-        * For new EH, all qcs are finished in one of three ways -
+        * For EH, all qcs are finished in one of three ways -
         * normal completion, error completion, and SCSI timeout.
         * Both completions can race against SCSI timeout.  When normal
         * completion wins, the qc never reaches EH.  When error
@@ -659,94 +656,87 @@ EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
 {
        unsigned long flags;
+       struct ata_link *link;
 
-       /* invoke error handler */
-       if (ap->ops->error_handler) {
-               struct ata_link *link;
-
-               /* acquire EH ownership */
-               ata_eh_acquire(ap);
+       /* acquire EH ownership */
+       ata_eh_acquire(ap);
  repeat:
-               /* kill fast drain timer */
-               del_timer_sync(&ap->fastdrain_timer);
+       /* kill fast drain timer */
+       del_timer_sync(&ap->fastdrain_timer);
 
-               /* process port resume request */
-               ata_eh_handle_port_resume(ap);
+       /* process port resume request */
+       ata_eh_handle_port_resume(ap);
 
-               /* fetch & clear EH info */
-               spin_lock_irqsave(ap->lock, flags);
+       /* fetch & clear EH info */
+       spin_lock_irqsave(ap->lock, flags);
 
-               ata_for_each_link(link, ap, HOST_FIRST) {
-                       struct ata_eh_context *ehc = &link->eh_context;
-                       struct ata_device *dev;
+       ata_for_each_link(link, ap, HOST_FIRST) {
+               struct ata_eh_context *ehc = &link->eh_context;
+               struct ata_device *dev;
 
-                       memset(&link->eh_context, 0, sizeof(link->eh_context));
-                       link->eh_context.i = link->eh_info;
-                       memset(&link->eh_info, 0, sizeof(link->eh_info));
+               memset(&link->eh_context, 0, sizeof(link->eh_context));
+               link->eh_context.i = link->eh_info;
+               memset(&link->eh_info, 0, sizeof(link->eh_info));
 
-                       ata_for_each_dev(dev, link, ENABLED) {
-                               int devno = dev->devno;
+               ata_for_each_dev(dev, link, ENABLED) {
+                       int devno = dev->devno;
 
-                               ehc->saved_xfer_mode[devno] = dev->xfer_mode;
-                               if (ata_ncq_enabled(dev))
-                                       ehc->saved_ncq_enabled |= 1 << devno;
-                       }
+                       ehc->saved_xfer_mode[devno] = dev->xfer_mode;
+                       if (ata_ncq_enabled(dev))
+                               ehc->saved_ncq_enabled |= 1 << devno;
                }
+       }
 
-               ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
-               ap->pflags &= ~ATA_PFLAG_EH_PENDING;
-               ap->excl_link = NULL;   /* don't maintain exclusion over EH */
+       ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
+       ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+       ap->excl_link = NULL;   /* don't maintain exclusion over EH */
 
-               spin_unlock_irqrestore(ap->lock, flags);
+       spin_unlock_irqrestore(ap->lock, flags);
 
-               /* invoke EH, skip if unloading or suspended */
-               if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
-                       ap->ops->error_handler(ap);
-               else {
-                       /* if unloading, commence suicide */
-                       if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
-                           !(ap->pflags & ATA_PFLAG_UNLOADED))
-                               ata_eh_unload(ap);
-                       ata_eh_finish(ap);
-               }
+       /* invoke EH, skip if unloading or suspended */
+       if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
+               ap->ops->error_handler(ap);
+       else {
+               /* if unloading, commence suicide */
+               if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
+                   !(ap->pflags & ATA_PFLAG_UNLOADED))
+                       ata_eh_unload(ap);
+               ata_eh_finish(ap);
+       }
 
-               /* process port suspend request */
-               ata_eh_handle_port_suspend(ap);
+       /* process port suspend request */
+       ata_eh_handle_port_suspend(ap);
 
-               /* Exception might have happened after ->error_handler
-                * recovered the port but before this point.  Repeat
-                * EH in such case.
-                */
-               spin_lock_irqsave(ap->lock, flags);
+       /*
+        * Exception might have happened after ->error_handler recovered the
+        * port but before this point.  Repeat EH in such case.
+        */
+       spin_lock_irqsave(ap->lock, flags);
 
-               if (ap->pflags & ATA_PFLAG_EH_PENDING) {
-                       if (--ap->eh_tries) {
-                               spin_unlock_irqrestore(ap->lock, flags);
-                               goto repeat;
-                       }
-                       ata_port_err(ap,
-                                    "EH pending after %d tries, giving up\n",
-                                    ATA_EH_MAX_TRIES);
-                       ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+       if (ap->pflags & ATA_PFLAG_EH_PENDING) {
+               if (--ap->eh_tries) {
+                       spin_unlock_irqrestore(ap->lock, flags);
+                       goto repeat;
                }
+               ata_port_err(ap,
+                            "EH pending after %d tries, giving up\n",
+                            ATA_EH_MAX_TRIES);
+               ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+       }
 
-               /* this run is complete, make sure EH info is clear */
-               ata_for_each_link(link, ap, HOST_FIRST)
-                       memset(&link->eh_info, 0, sizeof(link->eh_info));
+       /* this run is complete, make sure EH info is clear */
+       ata_for_each_link(link, ap, HOST_FIRST)
+               memset(&link->eh_info, 0, sizeof(link->eh_info));
 
-               /* end eh (clear host_eh_scheduled) while holding
-                * ap->lock such that if exception occurs after this
-                * point but before EH completion, SCSI midlayer will
-                * re-initiate EH.
-                */
-               ap->ops->end_eh(ap);
+       /*
+        * end eh (clear host_eh_scheduled) while holding ap->lock such that if
+        * exception occurs after this point but before EH completion, SCSI
+        * midlayer will re-initiate EH.
+        */
+       ap->ops->end_eh(ap);
 
-               spin_unlock_irqrestore(ap->lock, flags);
-               ata_eh_release(ap);
-       } else {
-               WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
-               ap->ops->eng_timeout(ap);
-       }
+       spin_unlock_irqrestore(ap->lock, flags);
+       ata_eh_release(ap);
 
        scsi_eh_flush_done_q(&ap->eh_done_q);
 
@@ -912,8 +902,6 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
 {
        struct ata_port *ap = qc->ap;
 
-       WARN_ON(!ap->ops->error_handler);
-
        qc->flags |= ATA_QCFLAG_EH;
        ata_eh_set_pending(ap, 1);
 
@@ -934,8 +922,6 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
  */
 void ata_std_sched_eh(struct ata_port *ap)
 {
-       WARN_ON(!ap->ops->error_handler);
-
        if (ap->pflags & ATA_PFLAG_INITIALIZING)
                return;
 
@@ -989,8 +975,6 @@ static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
        struct ata_queued_cmd *qc;
        int tag, nr_aborted = 0;
 
-       WARN_ON(!ap->ops->error_handler);
-
        /* we're gonna abort all commands, no need for fast drain */
        ata_eh_set_pending(ap, 0);
 
@@ -1065,8 +1049,6 @@ EXPORT_SYMBOL_GPL(ata_port_abort);
  */
 static void __ata_port_freeze(struct ata_port *ap)
 {
-       WARN_ON(!ap->ops->error_handler);
-
        if (ap->ops->freeze)
                ap->ops->freeze(ap);
 
@@ -1091,8 +1073,6 @@ static void __ata_port_freeze(struct ata_port *ap)
  */
 int ata_port_freeze(struct ata_port *ap)
 {
-       WARN_ON(!ap->ops->error_handler);
-
        __ata_port_freeze(ap);
 
        return ata_port_abort(ap);
@@ -1112,9 +1092,6 @@ void ata_eh_freeze_port(struct ata_port *ap)
 {
        unsigned long flags;
 
-       if (!ap->ops->error_handler)
-               return;
-
        spin_lock_irqsave(ap->lock, flags);
        __ata_port_freeze(ap);
        spin_unlock_irqrestore(ap->lock, flags);
@@ -1134,9 +1111,6 @@ void ata_eh_thaw_port(struct ata_port *ap)
 {
        unsigned long flags;
 
-       if (!ap->ops->error_handler)
-               return;
-
        spin_lock_irqsave(ap->lock, flags);
 
        ap->pflags &= ~ATA_PFLAG_FROZEN;
@@ -2575,7 +2549,7 @@ int ata_eh_reset(struct ata_link *link, int classify,
        /*
         * Prepare to reset
         */
-       while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
+       while (ata_eh_reset_timeouts[max_tries] != UINT_MAX)
                max_tries++;
        if (link->flags & ATA_LFLAG_RST_ONCE)
                max_tries = 1;
index 85e279a..5d31c08 100644 (file)
 #include "libata-transport.h"
 
 /* debounce timing parameters in msecs { interval, duration, timeout } */
-const unsigned long sata_deb_timing_normal[]           = {   5,  100, 2000 };
+const unsigned int sata_deb_timing_normal[]            = {   5,  100, 2000 };
 EXPORT_SYMBOL_GPL(sata_deb_timing_normal);
-const unsigned long sata_deb_timing_hotplug[]          = {  25,  500, 2000 };
+const unsigned int sata_deb_timing_hotplug[]           = {  25,  500, 2000 };
 EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug);
-const unsigned long sata_deb_timing_long[]             = { 100, 2000, 5000 };
+const unsigned int sata_deb_timing_long[]              = { 100, 2000, 5000 };
 EXPORT_SYMBOL_GPL(sata_deb_timing_long);
 
 /**
@@ -232,11 +232,11 @@ EXPORT_SYMBOL_GPL(ata_tf_from_fis);
  *     RETURNS:
  *     0 on success, -errno on failure.
  */
-int sata_link_debounce(struct ata_link *link, const unsigned long *params,
+int sata_link_debounce(struct ata_link *link, const unsigned int *params,
                       unsigned long deadline)
 {
-       unsigned long interval = params[0];
-       unsigned long duration = params[1];
+       unsigned int interval = params[0];
+       unsigned int duration = params[1];
        unsigned long last_jiffies, t;
        u32 last, cur;
        int rc;
@@ -295,7 +295,7 @@ EXPORT_SYMBOL_GPL(sata_link_debounce);
  *     RETURNS:
  *     0 on success, -errno on failure.
  */
-int sata_link_resume(struct ata_link *link, const unsigned long *params,
+int sata_link_resume(struct ata_link *link, const unsigned int *params,
                     unsigned long deadline)
 {
        int tries = ATA_LINK_RESUME_TRIES;
@@ -528,7 +528,7 @@ EXPORT_SYMBOL_GPL(sata_set_spd);
  *     RETURNS:
  *     0 on success, -errno otherwise.
  */
-int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
+int sata_link_hardreset(struct ata_link *link, const unsigned int *timing,
                        unsigned long deadline,
                        bool *online, int (*check_ready)(struct ata_link *))
 {
@@ -1139,92 +1139,12 @@ struct ata_port *ata_sas_port_alloc(struct ata_host *host,
        ap->flags |= port_info->flags;
        ap->ops = port_info->port_ops;
        ap->cbl = ATA_CBL_SATA;
+       ap->print_id = atomic_inc_return(&ata_print_id);
 
        return ap;
 }
 EXPORT_SYMBOL_GPL(ata_sas_port_alloc);
 
-/**
- *     ata_sas_port_start - Set port up for dma.
- *     @ap: Port to initialize
- *
- *     Called just after data structures for each port are
- *     initialized.
- *
- *     May be used as the port_start() entry in ata_port_operations.
- *
- *     LOCKING:
- *     Inherited from caller.
- */
-int ata_sas_port_start(struct ata_port *ap)
-{
-       /*
-        * the port is marked as frozen at allocation time, but if we don't
-        * have new eh, we won't thaw it
-        */
-       if (!ap->ops->error_handler)
-               ap->pflags &= ~ATA_PFLAG_FROZEN;
-       return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_start);
-
-/**
- *     ata_sas_port_stop - Undo ata_sas_port_start()
- *     @ap: Port to shut down
- *
- *     May be used as the port_stop() entry in ata_port_operations.
- *
- *     LOCKING:
- *     Inherited from caller.
- */
-
-void ata_sas_port_stop(struct ata_port *ap)
-{
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_stop);
-
-/**
- * ata_sas_async_probe - simply schedule probing and return
- * @ap: Port to probe
- *
- * For batch scheduling of probe for sas attached ata devices, assumes
- * the port has already been through ata_sas_port_init()
- */
-void ata_sas_async_probe(struct ata_port *ap)
-{
-       __ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_async_probe);
-
-int ata_sas_sync_probe(struct ata_port *ap)
-{
-       return ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_sync_probe);
-
-
-/**
- *     ata_sas_port_init - Initialize a SATA device
- *     @ap: SATA port to initialize
- *
- *     LOCKING:
- *     PCI/etc. bus probe sem.
- *
- *     RETURNS:
- *     Zero on success, non-zero on error.
- */
-
-int ata_sas_port_init(struct ata_port *ap)
-{
-       int rc = ap->ops->port_start(ap);
-
-       if (rc)
-               return rc;
-       ap->print_id = atomic_inc_return(&ata_print_id);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_init);
-
 int ata_sas_tport_add(struct device *parent, struct ata_port *ap)
 {
        return ata_tport_add(parent, ap);
@@ -1238,20 +1158,6 @@ void ata_sas_tport_delete(struct ata_port *ap)
 EXPORT_SYMBOL_GPL(ata_sas_tport_delete);
 
 /**
- *     ata_sas_port_destroy - Destroy a SATA port allocated by ata_sas_port_alloc
- *     @ap: SATA port to destroy
- *
- */
-
-void ata_sas_port_destroy(struct ata_port *ap)
-{
-       if (ap->ops->port_stop)
-               ap->ops->port_stop(ap);
-       kfree(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_destroy);
-
-/**
  *     ata_sas_slave_configure - Default slave_config routine for libata devices
  *     @sdev: SCSI device to configure
  *     @ap: ATA port to which SCSI device is attached
index c6ece32..d3f28b8 100644 (file)
@@ -135,11 +135,11 @@ static ssize_t ata_scsi_park_store(struct device *device,
        struct scsi_device *sdev = to_scsi_device(device);
        struct ata_port *ap;
        struct ata_device *dev;
-       long int input;
+       int input;
        unsigned long flags;
        int rc;
 
-       rc = kstrtol(buf, 10, &input);
+       rc = kstrtoint(buf, 10, &input);
        if (rc)
                return rc;
        if (input < -2)
@@ -710,47 +710,6 @@ static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc)
 }
 
 /**
- *     ata_dump_status - user friendly display of error info
- *     @ap: the port in question
- *     @tf: ptr to filled out taskfile
- *
- *     Decode and dump the ATA error/status registers for the user so
- *     that they have some idea what really happened at the non
- *     make-believe layer.
- *
- *     LOCKING:
- *     inherited from caller
- */
-static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf)
-{
-       u8 stat = tf->status, err = tf->error;
-
-       if (stat & ATA_BUSY) {
-               ata_port_warn(ap, "status=0x%02x {Busy} ", stat);
-       } else {
-               ata_port_warn(ap, "status=0x%02x { %s%s%s%s%s%s%s} ", stat,
-                             stat & ATA_DRDY ? "DriveReady " : "",
-                             stat & ATA_DF ? "DeviceFault " : "",
-                             stat & ATA_DSC ? "SeekComplete " : "",
-                             stat & ATA_DRQ ? "DataRequest " : "",
-                             stat & ATA_CORR ? "CorrectedError " : "",
-                             stat & ATA_SENSE ? "Sense " : "",
-                             stat & ATA_ERR ? "Error " : "");
-               if (err)
-                       ata_port_warn(ap, "error=0x%02x {%s%s%s%s%s%s", err,
-                                     err & ATA_ABORTED ?
-                                     "DriveStatusError " : "",
-                                     err & ATA_ICRC ?
-                                     (err & ATA_ABORTED ?
-                                      "BadCRC " : "Sector ") : "",
-                                     err & ATA_UNC ? "UncorrectableError " : "",
-                                     err & ATA_IDNF ? "SectorIdNotFound " : "",
-                                     err & ATA_TRK0NF ? "TrackZeroNotFound " : "",
-                                     err & ATA_AMNF ? "AddrMarkNotFound " : "");
-       }
-}
-
-/**
  *     ata_to_sense_error - convert ATA error to SCSI error
  *     @id: ATA device number
  *     @drv_stat: value contained in ATA status register
@@ -758,7 +717,6 @@ static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf)
  *     @sk: the sense key we'll fill out
  *     @asc: the additional sense code we'll fill out
  *     @ascq: the additional sense code qualifier we'll fill out
- *     @verbose: be verbose
  *
  *     Converts an ATA error into a SCSI error.  Fill out pointers to
  *     SK, ASC, and ASCQ bytes for later use in fixed or descriptor
@@ -768,7 +726,7 @@ static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf)
  *     spin_lock_irqsave(host lock)
  */
 static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
-                              u8 *asc, u8 *ascq, int verbose)
+                              u8 *asc, u8 *ascq)
 {
        int i;
 
@@ -847,7 +805,7 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
                                *sk = sense_table[i][1];
                                *asc = sense_table[i][2];
                                *ascq = sense_table[i][3];
-                               goto translate_done;
+                               return;
                        }
                }
        }
@@ -862,7 +820,7 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
                        *sk = stat_table[i][1];
                        *asc = stat_table[i][2];
                        *ascq = stat_table[i][3];
-                       goto translate_done;
+                       return;
                }
        }
 
@@ -873,12 +831,6 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
        *sk = ABORTED_COMMAND;
        *asc = 0x00;
        *ascq = 0x00;
-
- translate_done:
-       if (verbose)
-               pr_err("ata%u: translated ATA stat/err 0x%02x/%02x to SCSI SK/ASC/ASCQ 0x%x/%02x/%02x\n",
-                      id, drv_stat, drv_err, *sk, *asc, *ascq);
-       return;
 }
 
 /*
@@ -904,7 +856,6 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
        struct ata_taskfile *tf = &qc->result_tf;
        unsigned char *sb = cmd->sense_buffer;
        unsigned char *desc = sb + 8;
-       int verbose = qc->ap->ops->error_handler == NULL;
        u8 sense_key, asc, ascq;
 
        memset(sb, 0, SCSI_SENSE_BUFFERSIZE);
@@ -916,7 +867,7 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc)
        if (qc->err_mask ||
            tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
                ata_to_sense_error(qc->ap->print_id, tf->status, tf->error,
-                                  &sense_key, &asc, &ascq, verbose);
+                                  &sense_key, &asc, &ascq);
                ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq);
        } else {
                /*
@@ -999,7 +950,6 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc)
        struct scsi_cmnd *cmd = qc->scsicmd;
        struct ata_taskfile *tf = &qc->result_tf;
        unsigned char *sb = cmd->sense_buffer;
-       int verbose = qc->ap->ops->error_handler == NULL;
        u64 block;
        u8 sense_key, asc, ascq;
 
@@ -1017,7 +967,7 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc)
        if (qc->err_mask ||
            tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
                ata_to_sense_error(qc->ap->print_id, tf->status, tf->error,
-                                  &sense_key, &asc, &ascq, verbose);
+                                  &sense_key, &asc, &ascq);
                ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq);
        } else {
                /* Could not decode error */
@@ -1186,9 +1136,6 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
        unsigned long flags;
        struct ata_device *dev;
 
-       if (!ap->ops->error_handler)
-               return;
-
        spin_lock_irqsave(ap->lock, flags);
        dev = __ata_scsi_find_dev(ap, sdev);
        if (dev && dev->sdev) {
@@ -1675,7 +1622,6 @@ static void ata_qc_done(struct ata_queued_cmd *qc)
 
 static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
 {
-       struct ata_port *ap = qc->ap;
        struct scsi_cmnd *cmd = qc->scsicmd;
        u8 *cdb = cmd->cmnd;
        int need_sense = (qc->err_mask != 0) &&
@@ -1699,9 +1645,6 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
                /* Keep the SCSI ML and status byte, clear host byte. */
                cmd->result &= 0x0000ffff;
 
-       if (need_sense && !ap->ops->error_handler)
-               ata_dump_status(ap, &qc->result_tf);
-
        ata_qc_done(qc);
 }
 
@@ -2608,71 +2551,6 @@ static unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf)
        return 0;
 }
 
-static void atapi_sense_complete(struct ata_queued_cmd *qc)
-{
-       if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0)) {
-               /* FIXME: not quite right; we don't want the
-                * translation of taskfile registers into
-                * a sense descriptors, since that's only
-                * correct for ATA, not ATAPI
-                */
-               ata_gen_passthru_sense(qc);
-       }
-
-       ata_qc_done(qc);
-}
-
-/* is it pointless to prefer PIO for "safety reasons"? */
-static inline int ata_pio_use_silly(struct ata_port *ap)
-{
-       return (ap->flags & ATA_FLAG_PIO_DMA);
-}
-
-static void atapi_request_sense(struct ata_queued_cmd *qc)
-{
-       struct ata_port *ap = qc->ap;
-       struct scsi_cmnd *cmd = qc->scsicmd;
-
-       memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
-
-#ifdef CONFIG_ATA_SFF
-       if (ap->ops->sff_tf_read)
-               ap->ops->sff_tf_read(ap, &qc->tf);
-#endif
-
-       /* fill these in, for the case where they are -not- overwritten */
-       cmd->sense_buffer[0] = 0x70;
-       cmd->sense_buffer[2] = qc->tf.error >> 4;
-
-       ata_qc_reinit(qc);
-
-       /* setup sg table and init transfer direction */
-       sg_init_one(&qc->sgent, cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
-       ata_sg_init(qc, &qc->sgent, 1);
-       qc->dma_dir = DMA_FROM_DEVICE;
-
-       memset(&qc->cdb, 0, qc->dev->cdb_len);
-       qc->cdb[0] = REQUEST_SENSE;
-       qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
-
-       qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
-       qc->tf.command = ATA_CMD_PACKET;
-
-       if (ata_pio_use_silly(ap)) {
-               qc->tf.protocol = ATAPI_PROT_DMA;
-               qc->tf.feature |= ATAPI_PKT_DMA;
-       } else {
-               qc->tf.protocol = ATAPI_PROT_PIO;
-               qc->tf.lbam = SCSI_SENSE_BUFFERSIZE;
-               qc->tf.lbah = 0;
-       }
-       qc->nbytes = SCSI_SENSE_BUFFERSIZE;
-
-       qc->complete_fn = atapi_sense_complete;
-
-       ata_qc_issue(qc);
-}
-
 /*
  * ATAPI devices typically report zero for their SCSI version, and sometimes
  * deviate from the spec WRT response data format.  If SCSI version is
@@ -2698,9 +2576,8 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
        struct scsi_cmnd *cmd = qc->scsicmd;
        unsigned int err_mask = qc->err_mask;
 
-       /* handle completion from new EH */
-       if (unlikely(qc->ap->ops->error_handler &&
-                    (err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID))) {
+       /* handle completion from EH */
+       if (unlikely(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID)) {
 
                if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
                        /* FIXME: not quite right; we don't want the
@@ -2732,23 +2609,10 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
                return;
        }
 
-       /* successful completion or old EH failure path */
-       if (unlikely(err_mask & AC_ERR_DEV)) {
-               cmd->result = SAM_STAT_CHECK_CONDITION;
-               atapi_request_sense(qc);
-               return;
-       } else if (unlikely(err_mask)) {
-               /* FIXME: not quite right; we don't want the
-                * translation of taskfile registers into
-                * a sense descriptors, since that's only
-                * correct for ATA, not ATAPI
-                */
-               ata_gen_passthru_sense(qc);
-       } else {
-               if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0)
-                       atapi_fixup_inquiry(cmd);
-               cmd->result = SAM_STAT_GOOD;
-       }
+       /* successful completion path */
+       if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0)
+               atapi_fixup_inquiry(cmd);
+       cmd->result = SAM_STAT_GOOD;
 
        ata_qc_done(qc);
 }
@@ -4797,9 +4661,6 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
        unsigned long flags;
        int devno, rc = 0;
 
-       if (!ap->ops->error_handler)
-               return -EOPNOTSUPP;
-
        if (lun != SCAN_WILD_CARD && lun)
                return -EINVAL;
 
@@ -4891,7 +4752,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
                        }
 
                        spin_unlock_irqrestore(ap->lock, flags);
-                       scsi_rescan_device(&(sdev->sdev_gendev));
+                       scsi_rescan_device(sdev);
                        scsi_device_put(sdev);
                        spin_lock_irqsave(ap->lock, flags);
                }
index 9d28bad..8fcc622 100644 (file)
@@ -883,31 +883,21 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
 {
        struct ata_port *ap = qc->ap;
 
-       if (ap->ops->error_handler) {
-               if (in_wq) {
-                       /* EH might have kicked in while host lock is
-                        * released.
-                        */
-                       qc = ata_qc_from_tag(ap, qc->tag);
-                       if (qc) {
-                               if (likely(!(qc->err_mask & AC_ERR_HSM))) {
-                                       ata_sff_irq_on(ap);
-                                       ata_qc_complete(qc);
-                               } else
-                                       ata_port_freeze(ap);
-                       }
-               } else {
-                       if (likely(!(qc->err_mask & AC_ERR_HSM)))
+       if (in_wq) {
+               /* EH might have kicked in while host lock is released. */
+               qc = ata_qc_from_tag(ap, qc->tag);
+               if (qc) {
+                       if (likely(!(qc->err_mask & AC_ERR_HSM))) {
+                               ata_sff_irq_on(ap);
                                ata_qc_complete(qc);
-                       else
+                       else
                                ata_port_freeze(ap);
                }
        } else {
-               if (in_wq) {
-                       ata_sff_irq_on(ap);
-                       ata_qc_complete(qc);
-               } else
+               if (likely(!(qc->err_mask & AC_ERR_HSM)))
                        ata_qc_complete(qc);
+               else
+                       ata_port_freeze(ap);
        }
 }
 
@@ -1971,7 +1961,7 @@ int sata_sff_hardreset(struct ata_link *link, unsigned int *class,
                       unsigned long deadline)
 {
        struct ata_eh_context *ehc = &link->eh_context;
-       const unsigned long *timing = sata_ehc_deb_timing(ehc);
+       const unsigned int *timing = sata_ehc_deb_timing(ehc);
        bool online;
        int rc;
 
index cf99388..6e7d352 100644 (file)
@@ -78,8 +78,6 @@ extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
 extern struct ata_port *ata_port_alloc(struct ata_host *host);
 extern const char *sata_spd_string(unsigned int spd);
-extern int ata_port_probe(struct ata_port *ap);
-extern void __ata_port_probe(struct ata_port *ap);
 extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
                                      u8 page, void *buf, unsigned int sectors);
 
@@ -124,7 +122,6 @@ extern void ata_scsi_media_change_notify(struct ata_device *dev);
 extern void ata_scsi_hotplug(struct work_struct *work);
 extern void ata_schedule_scsi_eh(struct Scsi_Host *shost);
 extern void ata_scsi_dev_rescan(struct work_struct *work);
-extern int ata_bus_probe(struct ata_port *ap);
 extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
                              unsigned int id, u64 lun);
 void ata_scsi_sdev_config(struct scsi_device *sdev);
index 314eaa1..d0c6924 100644 (file)
@@ -917,15 +917,13 @@ static int arasan_cf_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int arasan_cf_remove(struct platform_device *pdev)
+static void arasan_cf_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct arasan_cf_dev *acdev = host->ports[0]->private_data;
 
        ata_host_detach(host);
        cf_exit(acdev);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -966,7 +964,7 @@ MODULE_DEVICE_TABLE(of, arasan_cf_id_table);
 
 static struct platform_driver arasan_cf_driver = {
        .probe          = arasan_cf_probe,
-       .remove         = arasan_cf_remove,
+       .remove_new     = arasan_cf_remove,
        .driver         = {
                .name   = DRIVER_NAME,
                .pm     = &arasan_cf_pm_ops,
index 49bc619..c36ee99 100644 (file)
@@ -27,7 +27,6 @@
 
 #include <asm/amigahw.h>
 #include <asm/amigaints.h>
-#include <asm/ide.h>
 #include <asm/setup.h>
 
 #define DRV_NAME "pata_buddha"
index c6e043e..c84a208 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/ata.h>
 #include <linux/libata.h>
 #include <linux/platform_device.h>
+#include <linux/sys_soc.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/ktime.h>
@@ -910,6 +911,12 @@ static struct ata_port_operations ep93xx_pata_port_ops = {
        .port_start             = ep93xx_pata_port_start,
 };
 
+static const struct soc_device_attribute ep93xx_soc_table[] = {
+       { .revision = "E1", .data = (void *)ATA_UDMA3 },
+       { .revision = "E2", .data = (void *)ATA_UDMA4 },
+       { /* sentinel */ }
+};
+
 static int ep93xx_pata_probe(struct platform_device *pdev)
 {
        struct ep93xx_pata_data *drv_data;
@@ -939,7 +946,7 @@ static int ep93xx_pata_probe(struct platform_device *pdev)
 
        drv_data = devm_kzalloc(&pdev->dev, sizeof(*drv_data), GFP_KERNEL);
        if (!drv_data) {
-               err = -ENXIO;
+               err = -ENOMEM;
                goto err_rel_gpio;
        }
 
@@ -952,7 +959,7 @@ static int ep93xx_pata_probe(struct platform_device *pdev)
        /* allocate host */
        host = ata_host_alloc(&pdev->dev, 1);
        if (!host) {
-               err = -ENXIO;
+               err = -ENOMEM;
                goto err_rel_dma;
        }
 
@@ -976,12 +983,11 @@ static int ep93xx_pata_probe(struct platform_device *pdev)
         * so this driver supports only UDMA modes.
         */
        if (drv_data->dma_rx_channel && drv_data->dma_tx_channel) {
-               int chip_rev = ep93xx_chip_revision();
+               const struct soc_device_attribute *match;
 
-               if (chip_rev == EP93XX_CHIP_REV_E1)
-                       ap->udma_mask = ATA_UDMA3;
-               else if (chip_rev == EP93XX_CHIP_REV_E2)
-                       ap->udma_mask = ATA_UDMA4;
+               match = soc_device_match(ep93xx_soc_table);
+               if (match)
+                       ap->udma_mask = (unsigned int) match->data;
                else
                        ap->udma_mask = ATA_UDMA2;
        }
@@ -1004,7 +1010,7 @@ err_rel_gpio:
        return err;
 }
 
-static int ep93xx_pata_remove(struct platform_device *pdev)
+static void ep93xx_pata_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct ep93xx_pata_data *drv_data = host->private_data;
@@ -1013,7 +1019,6 @@ static int ep93xx_pata_remove(struct platform_device *pdev)
        ep93xx_pata_release_dma(drv_data);
        ep93xx_pata_clear_regs(drv_data->ide_base);
        ep93xx_ide_release_gpio(pdev);
-       return 0;
 }
 
 static struct platform_driver ep93xx_pata_platform_driver = {
@@ -1021,7 +1026,7 @@ static struct platform_driver ep93xx_pata_platform_driver = {
                .name = DRV_NAME,
        },
        .probe = ep93xx_pata_probe,
-       .remove = ep93xx_pata_remove,
+       .remove_new = ep93xx_pata_remove,
 };
 
 module_platform_driver(ep93xx_pata_platform_driver);
index 996516e..0c2ae43 100644 (file)
 #include <asm/atarihw.h>
 #include <asm/atariints.h>
 #include <asm/atari_stdma.h>
-#include <asm/ide.h>
 
 #define DRV_NAME "pata_falcon"
 #define DRV_VERSION "0.1.0"
 
+static int pata_falcon_swap_mask;
+
+module_param_named(data_swab, pata_falcon_swap_mask, int, 0444);
+MODULE_PARM_DESC(data_swab, "Data byte swap enable/disable bitmap (0x1==drive1, 0x2==drive2, 0x4==drive3, 0x8==drive4, default==0)");
+
 static const struct scsi_host_template pata_falcon_sht = {
        ATA_PIO_SHT(DRV_NAME),
 };
@@ -50,7 +54,7 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc,
 
        if (dev->class == ATA_DEV_ATA && cmd &&
            !blk_rq_is_passthrough(scsi_cmd_to_rq(cmd)))
-               swap = 0;
+               swap = (uintptr_t)ap->private_data & BIT(dev->devno);
 
        /* Transfer multiple of 2 bytes */
        if (rw == READ) {
@@ -123,8 +127,9 @@ static int __init pata_falcon_init_one(struct platform_device *pdev)
        struct resource *base_res, *ctl_res, *irq_res;
        struct ata_host *host;
        struct ata_port *ap;
-       void __iomem *base;
-       int irq = 0;
+       void __iomem *base, *ctl_base;
+       int mask_shift = 0; /* Q40 & Falcon default */
+       int irq = 0, io_offset = 1, reg_shift = 2; /* Falcon defaults */
 
        dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 PATA controller\n");
 
@@ -165,26 +170,38 @@ static int __init pata_falcon_init_one(struct platform_device *pdev)
        ap->pio_mask = ATA_PIO4;
        ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY;
 
-       base = (void __iomem *)base_mem_res->start;
        /* N.B. this assumes data_addr will be used for word-sized I/O only */
-       ap->ioaddr.data_addr            = base + 0 + 0 * 4;
-       ap->ioaddr.error_addr           = base + 1 + 1 * 4;
-       ap->ioaddr.feature_addr         = base + 1 + 1 * 4;
-       ap->ioaddr.nsect_addr           = base + 1 + 2 * 4;
-       ap->ioaddr.lbal_addr            = base + 1 + 3 * 4;
-       ap->ioaddr.lbam_addr            = base + 1 + 4 * 4;
-       ap->ioaddr.lbah_addr            = base + 1 + 5 * 4;
-       ap->ioaddr.device_addr          = base + 1 + 6 * 4;
-       ap->ioaddr.status_addr          = base + 1 + 7 * 4;
-       ap->ioaddr.command_addr         = base + 1 + 7 * 4;
-
-       base = (void __iomem *)ctl_mem_res->start;
-       ap->ioaddr.altstatus_addr       = base + 1;
-       ap->ioaddr.ctl_addr             = base + 1;
-
-       ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
-                     (unsigned long)base_mem_res->start,
-                     (unsigned long)ctl_mem_res->start);
+       ap->ioaddr.data_addr = (void __iomem *)base_mem_res->start;
+
+       if (base_res) {         /* only Q40 has IO resources */
+               io_offset = 0x10000;
+               reg_shift = 0;
+               base = (void __iomem *)base_res->start;
+               ctl_base = (void __iomem *)ctl_res->start;
+       } else {
+               base = (void __iomem *)base_mem_res->start;
+               ctl_base = (void __iomem *)ctl_mem_res->start;
+       }
+
+       ap->ioaddr.error_addr   = base + io_offset + (1 << reg_shift);
+       ap->ioaddr.feature_addr = base + io_offset + (1 << reg_shift);
+       ap->ioaddr.nsect_addr   = base + io_offset + (2 << reg_shift);
+       ap->ioaddr.lbal_addr    = base + io_offset + (3 << reg_shift);
+       ap->ioaddr.lbam_addr    = base + io_offset + (4 << reg_shift);
+       ap->ioaddr.lbah_addr    = base + io_offset + (5 << reg_shift);
+       ap->ioaddr.device_addr  = base + io_offset + (6 << reg_shift);
+       ap->ioaddr.status_addr  = base + io_offset + (7 << reg_shift);
+       ap->ioaddr.command_addr = base + io_offset + (7 << reg_shift);
+
+       ap->ioaddr.altstatus_addr       = ctl_base + io_offset;
+       ap->ioaddr.ctl_addr             = ctl_base + io_offset;
+
+       ata_port_desc(ap, "cmd %px ctl %px data %px",
+                     base, ctl_base, ap->ioaddr.data_addr);
+
+       if (pdev->id > 0)
+               mask_shift = 2;
+       ap->private_data = (void *)(uintptr_t)(pata_falcon_swap_mask >> mask_shift);
 
        irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
        if (irq_res && irq_res->start > 0) {
index 6f6734c..4d6ef90 100644 (file)
@@ -14,8 +14,7 @@
 #include <linux/module.h>
 #include <linux/libata.h>
 #include <linux/bitops.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/clk.h>
 #include "sata_gemini.h"
 
@@ -470,11 +469,7 @@ static int pata_ftide010_probe(struct platform_device *pdev)
        if (irq < 0)
                return irq;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
-       ftide->base = devm_ioremap_resource(dev, res);
+       ftide->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(ftide->base))
                return PTR_ERR(ftide->base);
 
@@ -541,15 +536,13 @@ err_dis_clk:
        return ret;
 }
 
-static int pata_ftide010_remove(struct platform_device *pdev)
+static void pata_ftide010_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct ftide010 *ftide = host->private_data;
 
        ata_host_detach(ftide->host);
        clk_disable_unprepare(ftide->pclk);
-
-       return 0;
 }
 
 static const struct of_device_id pata_ftide010_of_match[] = {
@@ -563,10 +556,11 @@ static struct platform_driver pata_ftide010_driver = {
                .of_match_table = pata_ftide010_of_match,
        },
        .probe = pata_ftide010_probe,
-       .remove = pata_ftide010_remove,
+       .remove_new = pata_ftide010_remove,
 };
 module_platform_driver(pata_ftide010_driver);
 
+MODULE_DESCRIPTION("low level driver for Faraday Technology FTIDE010");
 MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" DRV_NAME);
index e5aa07f..3bdbe2b 100644 (file)
@@ -27,7 +27,6 @@
 #include <asm/amigahw.h>
 #include <asm/amigaints.h>
 #include <asm/amigayle.h>
-#include <asm/ide.h>
 #include <asm/setup.h>
 
 #define DRV_NAME "pata_gayle"
index 4013f28..d0aa8fc 100644 (file)
@@ -141,21 +141,15 @@ static int pata_imx_probe(struct platform_device *pdev)
        if (!priv)
                return -ENOMEM;
 
-       priv->clk = devm_clk_get(&pdev->dev, NULL);
+       priv->clk = devm_clk_get_enabled(&pdev->dev, NULL);
        if (IS_ERR(priv->clk)) {
-               dev_err(&pdev->dev, "Failed to get clock\n");
+               dev_err(&pdev->dev, "Failed to get and enable clock\n");
                return PTR_ERR(priv->clk);
        }
 
-       ret = clk_prepare_enable(priv->clk);
-       if (ret)
-               return ret;
-
        host = ata_host_alloc(&pdev->dev, 1);
-       if (!host) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       if (!host)
+               return -ENOMEM;
 
        host->private_data = priv;
        ap = host->ports[0];
@@ -164,12 +158,9 @@ static int pata_imx_probe(struct platform_device *pdev)
        ap->pio_mask = ATA_PIO4;
        ap->flags |= ATA_FLAG_SLAVE_POSS;
 
-       io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->host_regs = devm_ioremap_resource(&pdev->dev, io_res);
-       if (IS_ERR(priv->host_regs)) {
-               ret = PTR_ERR(priv->host_regs);
-               goto err;
-       }
+       priv->host_regs = devm_platform_get_and_ioremap_resource(pdev, 0, &io_res);
+       if (IS_ERR(priv->host_regs))
+               return PTR_ERR(priv->host_regs);
 
        ap->ioaddr.cmd_addr = priv->host_regs + PATA_IMX_DRIVE_DATA;
        ap->ioaddr.ctl_addr = priv->host_regs + PATA_IMX_DRIVE_CONTROL;
@@ -195,16 +186,12 @@ static int pata_imx_probe(struct platform_device *pdev)
                                &pata_imx_sht);
 
        if (ret)
-               goto err;
+               return ret;
 
        return 0;
-err:
-       clk_disable_unprepare(priv->clk);
-
-       return ret;
 }
 
-static int pata_imx_remove(struct platform_device *pdev)
+static void pata_imx_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct pata_imx_priv *priv = host->private_data;
@@ -212,10 +199,6 @@ static int pata_imx_remove(struct platform_device *pdev)
        ata_host_detach(host);
 
        __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN);
-
-       clk_disable_unprepare(priv->clk);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -266,7 +249,7 @@ MODULE_DEVICE_TABLE(of, imx_pata_dt_ids);
 
 static struct platform_driver pata_imx_driver = {
        .probe          = pata_imx_probe,
-       .remove         = pata_imx_remove,
+       .remove_new     = pata_imx_remove,
        .driver = {
                .name           = DRV_NAME,
                .of_match_table = imx_pata_dt_ids,
index b1daa4d..246bb4f 100644 (file)
@@ -242,12 +242,6 @@ static int ixp4xx_pata_probe(struct platform_device *pdev)
        int ret;
        int irq;
 
-       cmd = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       ctl = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-
-       if (!cmd || !ctl)
-               return -EINVAL;
-
        ixpp = devm_kzalloc(dev, sizeof(*ixpp), GFP_KERNEL);
        if (!ixpp)
                return -ENOMEM;
@@ -271,18 +265,18 @@ static int ixp4xx_pata_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       ixpp->cmd = devm_ioremap_resource(dev, cmd);
-       ixpp->ctl = devm_ioremap_resource(dev, ctl);
-       if (IS_ERR(ixpp->cmd) || IS_ERR(ixpp->ctl))
-               return -ENOMEM;
+       ixpp->cmd = devm_platform_get_and_ioremap_resource(pdev, 0, &cmd);
+       if (IS_ERR(ixpp->cmd))
+               return PTR_ERR(ixpp->cmd);
+
+       ixpp->ctl = devm_platform_get_and_ioremap_resource(pdev, 1, &ctl);
+       if (IS_ERR(ixpp->ctl))
+               return PTR_ERR(ixpp->ctl);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq > 0)
-               irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
-       else if (irq < 0)
+       if (irq < 0)
                return irq;
-       else
-               return -EINVAL;
+       irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
 
        /* Just one port to set up */
        ixp4xx_setup_port(ixpp->host->ports[0], ixpp, cmd->start, ctl->start);
index 66c9dea..6c317a4 100644 (file)
 #include <linux/gfp.h>
 #include <linux/delay.h>
 #include <linux/libata.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/types.h>
 
 #include <asm/cacheflush.h>
@@ -800,8 +801,7 @@ static int mpc52xx_ata_probe(struct platform_device *op)
        return rv;
 }
 
-static int
-mpc52xx_ata_remove(struct platform_device *op)
+static void mpc52xx_ata_remove(struct platform_device *op)
 {
        struct ata_host *host = platform_get_drvdata(op);
        struct mpc52xx_ata_priv *priv = host->private_data;
@@ -815,8 +815,6 @@ mpc52xx_ata_remove(struct platform_device *op)
        irq_dispose_mapping(task_irq);
        bcom_ata_release(priv->dmatsk);
        irq_dispose_mapping(priv->ata_irq);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -857,7 +855,7 @@ static const struct of_device_id mpc52xx_ata_of_match[] = {
 
 static struct platform_driver mpc52xx_ata_of_platform_driver = {
        .probe          = mpc52xx_ata_probe,
-       .remove         = mpc52xx_ata_remove,
+       .remove_new     = mpc52xx_ata_remove,
 #ifdef CONFIG_PM_SLEEP
        .suspend        = mpc52xx_ata_suspend,
        .resume         = mpc52xx_ata_resume,
index ea402e0..5275c64 100644 (file)
@@ -295,7 +295,7 @@ static int pxa_ata_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int pxa_ata_remove(struct platform_device *pdev)
+static void pxa_ata_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct pata_pxa_data *data = host->ports[0]->private_data;
@@ -303,13 +303,11 @@ static int pxa_ata_remove(struct platform_device *pdev)
        dma_release_channel(data->dma_chan);
 
        ata_host_detach(host);
-
-       return 0;
 }
 
 static struct platform_driver pxa_ata_driver = {
        .probe          = pxa_ata_probe,
-       .remove         = pxa_ata_remove,
+       .remove_new     = pxa_ata_remove,
        .driver         = {
                .name           = DRV_NAME,
        },
index 3974d29..0fa253a 100644 (file)
@@ -155,18 +155,16 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int rb532_pata_driver_remove(struct platform_device *pdev)
+static void rb532_pata_driver_remove(struct platform_device *pdev)
 {
        struct ata_host *ah = platform_get_drvdata(pdev);
 
        ata_host_detach(ah);
-
-       return 0;
 }
 
 static struct platform_driver rb532_pata_platform_driver = {
        .probe          = rb532_pata_driver_probe,
-       .remove         = rb532_pata_driver_remove,
+       .remove_new     = rb532_pata_driver_remove,
        .driver  = {
                .name   = DRV_NAME,
        },
index 3b62ea4..93882e9 100644 (file)
@@ -180,8 +180,7 @@ static void sl82c105_bmdma_start(struct ata_queued_cmd *qc)
  *     document.
  *
  *     This function is also called to turn off DMA when a timeout occurs
- *     during DMA operation. In both cases we need to reset the engine,
- *     so no actual eng_timeout handler is required.
+ *     during DMA operation. In both cases we need to reset the engine.
  *
  *     We assume bmdma_stop is always called if bmdma_start as called. If
  *     not then we may need to wrap qc_issue.
index fabdd1e..52f5168 100644 (file)
@@ -18,9 +18,8 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/dmaengine.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
 #include <linux/libata.h>
@@ -1211,7 +1210,7 @@ error_out:
        return err;
 }
 
-static int sata_dwc_remove(struct platform_device *ofdev)
+static void sata_dwc_remove(struct platform_device *ofdev)
 {
        struct device *dev = &ofdev->dev;
        struct ata_host *host = dev_get_drvdata(dev);
@@ -1227,7 +1226,6 @@ static int sata_dwc_remove(struct platform_device *ofdev)
 #endif
 
        dev_dbg(dev, "done\n");
-       return 0;
 }
 
 static const struct of_device_id sata_dwc_match[] = {
@@ -1242,7 +1240,7 @@ static struct platform_driver sata_dwc_driver = {
                .of_match_table = sata_dwc_match,
        },
        .probe = sata_dwc_probe,
-       .remove = sata_dwc_remove,
+       .remove_new = sata_dwc_remove,
 };
 
 module_platform_driver(sata_dwc_driver);
index ccd99b9..01aa05f 100644 (file)
@@ -12,6 +12,9 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -19,9 +22,6 @@
 #include <scsi/scsi_cmnd.h>
 #include <linux/libata.h>
 #include <asm/io.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
 
 static unsigned int intr_coalescing_count;
 module_param(intr_coalescing_count, int, S_IRUGO);
@@ -1526,7 +1526,7 @@ error_exit_with_cleanup:
        return retval;
 }
 
-static int sata_fsl_remove(struct platform_device *ofdev)
+static void sata_fsl_remove(struct platform_device *ofdev)
 {
        struct ata_host *host = platform_get_drvdata(ofdev);
        struct sata_fsl_host_priv *host_priv = host->private_data;
@@ -1535,8 +1535,6 @@ static int sata_fsl_remove(struct platform_device *ofdev)
        device_remove_file(&ofdev->dev, &host_priv->rx_watermark);
 
        ata_host_detach(host);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1591,7 +1589,7 @@ static struct platform_driver fsl_sata_driver = {
                .of_match_table = fsl_sata_match,
        },
        .probe          = sata_fsl_probe,
-       .remove         = sata_fsl_remove,
+       .remove_new     = sata_fsl_remove,
 #ifdef CONFIG_PM_SLEEP
        .suspend        = sata_fsl_suspend,
        .resume         = sata_fsl_resume,
index c42cc9b..400b22e 100644 (file)
@@ -12,8 +12,7 @@
 #include <linux/regmap.h>
 #include <linux/delay.h>
 #include <linux/reset.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/pinctrl/consumer.h>
@@ -400,7 +399,7 @@ out_unprep_clk:
        return ret;
 }
 
-static int gemini_sata_remove(struct platform_device *pdev)
+static void gemini_sata_remove(struct platform_device *pdev)
 {
        struct sata_gemini *sg = platform_get_drvdata(pdev);
 
@@ -409,8 +408,6 @@ static int gemini_sata_remove(struct platform_device *pdev)
                clk_unprepare(sg->sata0_pclk);
        }
        sg_singleton = NULL;
-
-       return 0;
 }
 
 static const struct of_device_id gemini_sata_of_match[] = {
@@ -424,10 +421,11 @@ static struct platform_driver gemini_sata_driver = {
                .of_match_table = gemini_sata_of_match,
        },
        .probe = gemini_sata_probe,
-       .remove = gemini_sata_remove,
+       .remove_new = gemini_sata_remove,
 };
 module_platform_driver(gemini_sata_driver);
 
+MODULE_DESCRIPTION("low level driver for Cortina Systems Gemini SATA bridge");
 MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" DRV_NAME);
index d6b324d..63ef7bb 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/libata.h>
@@ -385,7 +385,7 @@ static int highbank_initialize_phys(struct device *dev, void __iomem *addr)
 static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class,
                                unsigned long deadline)
 {
-       static const unsigned long timing[] = { 5, 100, 500};
+       static const unsigned int timing[] = { 5, 100, 500};
        struct ata_port *ap = link->ap;
        struct ahci_port_priv *pp = ap->private_data;
        struct ahci_host_priv *hpriv = ap->host->private_data;
index 2c8c78e..db9c255 100644 (file)
@@ -619,7 +619,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
        struct ata_port *ap = link->ap;
        void __iomem *port_base = inic_port_base(ap);
        void __iomem *idma_ctl = port_base + PORT_IDMA_CTL;
-       const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+       const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
        int rc;
 
        /* hammer it into sane state */
index d404e63..d105db5 100644 (file)
@@ -3633,7 +3633,7 @@ static int mv_hardreset(struct ata_link *link, unsigned int *class,
 
        /* Workaround for errata FEr SATA#10 (part 2) */
        do {
-               const unsigned long *timing =
+               const unsigned int *timing =
                                sata_ehc_deb_timing(&link->eh_context);
 
                rc = sata_link_hardreset(link, timing, deadline + extra,
@@ -4210,7 +4210,7 @@ err:
  *      A platform bus SATA device has been unplugged. Perform the needed
  *      cleanup. Also called on module unload for any active devices.
  */
-static int mv_platform_remove(struct platform_device *pdev)
+static void mv_platform_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct mv_host_priv *hpriv = host->private_data;
@@ -4228,7 +4228,6 @@ static int mv_platform_remove(struct platform_device *pdev)
                }
                phy_power_off(hpriv->port_phys[port]);
        }
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -4284,7 +4283,7 @@ MODULE_DEVICE_TABLE(of, mv_sata_dt_ids);
 
 static struct platform_driver mv_platform_driver = {
        .probe          = mv_platform_probe,
-       .remove         = mv_platform_remove,
+       .remove_new     = mv_platform_remove,
        .suspend        = mv_platform_suspend,
        .resume         = mv_platform_resume,
        .driver         = {
index abf5651..0a0cee7 100644 (file)
@@ -1529,7 +1529,7 @@ static int nv_hardreset(struct ata_link *link, unsigned int *class,
                sata_link_hardreset(link, sata_deb_timing_hotplug, deadline,
                                    NULL, NULL);
        else {
-               const unsigned long *timing = sata_ehc_deb_timing(ehc);
+               const unsigned int *timing = sata_ehc_deb_timing(ehc);
                int rc;
 
                if (!(ehc->i.flags & ATA_EHI_QUIET))
index 34790f1..c1469d0 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/ata.h>
 #include <linux/libata.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/err.h>
@@ -861,15 +861,11 @@ static int sata_rcar_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct ata_host *host;
        struct sata_rcar_priv *priv;
-       struct resource *mem;
-       int irq;
-       int ret = 0;
+       int irq, ret;
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return irq;
-       if (!irq)
-               return -EINVAL;
 
        priv = devm_kzalloc(dev, sizeof(struct sata_rcar_priv), GFP_KERNEL);
        if (!priv)
@@ -890,8 +886,7 @@ static int sata_rcar_probe(struct platform_device *pdev)
 
        host->private_data = priv;
 
-       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       priv->base = devm_ioremap_resource(dev, mem);
+       priv->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(priv->base)) {
                ret = PTR_ERR(priv->base);
                goto err_pm_put;
@@ -914,7 +909,7 @@ err_pm_put:
        return ret;
 }
 
-static int sata_rcar_remove(struct platform_device *pdev)
+static void sata_rcar_remove(struct platform_device *pdev)
 {
        struct ata_host *host = platform_get_drvdata(pdev);
        struct sata_rcar_priv *priv = host->private_data;
@@ -930,8 +925,6 @@ static int sata_rcar_remove(struct platform_device *pdev)
 
        pm_runtime_put(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -1016,7 +1009,7 @@ static const struct dev_pm_ops sata_rcar_pm_ops = {
 
 static struct platform_driver sata_rcar_driver = {
        .probe          = sata_rcar_probe,
-       .remove         = sata_rcar_remove,
+       .remove_new     = sata_rcar_remove,
        .driver = {
                .name           = DRV_NAME,
                .of_match_table = sata_rcar_match,
index e72a025..142e70b 100644 (file)
@@ -597,7 +597,7 @@ static int sil24_init_port(struct ata_port *ap)
 static int sil24_exec_polled_cmd(struct ata_port *ap, int pmp,
                                 const struct ata_taskfile *tf,
                                 int is_cmd, u32 ctrl,
-                                unsigned long timeout_msec)
+                                unsigned int timeout_msec)
 {
        void __iomem *port = sil24_port_base(ap);
        struct sil24_port_priv *pp = ap->private_data;
@@ -651,7 +651,7 @@ static int sil24_softreset(struct ata_link *link, unsigned int *class,
 {
        struct ata_port *ap = link->ap;
        int pmp = sata_srst_pmp(link);
-       unsigned long timeout_msec = 0;
+       unsigned int timeout_msec = 0;
        struct ata_taskfile tf;
        const char *reason;
        int rc;
index ccc0160..b51d7a9 100644 (file)
@@ -232,7 +232,6 @@ static const struct scsi_host_template pdc_sata_sht = {
        .dma_boundary           = ATA_DMA_BOUNDARY,
 };
 
-/* TODO: inherit from base port_ops after converting to new EH */
 static struct ata_port_operations pdc_20621_ops = {
        .inherits               = &ata_sff_port_ops,
 
index 79ab532..6bc8610 100644 (file)
@@ -1557,7 +1557,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
        do {
                int sent;
 
-               bvec_set_page(&bvec, page, offset, len);
+               bvec_set_page(&bvec, page, len, offset);
                iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
 
                sent = sock_sendmsg(socket, &msg);
index 8640130..9680909 100644 (file)
@@ -1643,9 +1643,12 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
        struct nullb_queue *nq = hctx->driver_data;
        LIST_HEAD(list);
        int nr = 0;
+       struct request *rq;
 
        spin_lock(&nq->poll_lock);
        list_splice_init(&nq->poll_list, &list);
+       list_for_each_entry(rq, &list, queuelist)
+               blk_mq_set_request_complete(rq);
        spin_unlock(&nq->poll_lock);
 
        while (!list_empty(&list)) {
@@ -1671,16 +1674,21 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
        struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
        struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-       pr_info("rq %p timed out\n", rq);
-
        if (hctx->type == HCTX_TYPE_POLL) {
                struct nullb_queue *nq = hctx->driver_data;
 
                spin_lock(&nq->poll_lock);
+               /* The request may have completed meanwhile. */
+               if (blk_mq_request_completed(rq)) {
+                       spin_unlock(&nq->poll_lock);
+                       return BLK_EH_DONE;
+               }
                list_del_init(&rq->queuelist);
                spin_unlock(&nq->poll_lock);
        }
 
+       pr_info("rq %p timed out\n", rq);
+
        /*
         * If the device is marked as blocking (i.e. memory backed or zoned
         * device), the submission path may be blocked waiting for resources
index 2328cc0..3de11f0 100644 (file)
@@ -7199,7 +7199,6 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
 static ssize_t do_rbd_remove(const char *buf, size_t count)
 {
        struct rbd_device *rbd_dev = NULL;
-       struct list_head *tmp;
        int dev_id;
        char opt_buf[6];
        bool force = false;
@@ -7226,8 +7225,7 @@ static ssize_t do_rbd_remove(const char *buf, size_t count)
 
        ret = -ENOENT;
        spin_lock(&rbd_dev_list_lock);
-       list_for_each(tmp, &rbd_dev_list) {
-               rbd_dev = list_entry(tmp, struct rbd_device, node);
+       list_for_each_entry(rbd_dev, &rbd_dev_list, node) {
                if (rbd_dev->dev_id == dev_id) {
                        ret = 0;
                        break;
diff --git a/drivers/cache/Kconfig b/drivers/cache/Kconfig
new file mode 100644 (file)
index 0000000..a57677f
--- /dev/null
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+menu "Cache Drivers"
+
+config AX45MP_L2_CACHE
+       bool "Andes Technology AX45MP L2 Cache controller"
+       depends on RISCV_DMA_NONCOHERENT
+       select RISCV_NONSTANDARD_CACHE_OPS
+       help
+         Support for the L2 cache controller on Andes Technology AX45MP platforms.
+
+endmenu
diff --git a/drivers/cache/Makefile b/drivers/cache/Makefile
new file mode 100644 (file)
index 0000000..2012e7f
--- /dev/null
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_AX45MP_L2_CACHE) += ax45mp_cache.o
diff --git a/drivers/cache/ax45mp_cache.c b/drivers/cache/ax45mp_cache.c
new file mode 100644 (file)
index 0000000..57186c5
--- /dev/null
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * non-coherent cache functions for Andes AX45MP
+ *
+ * Copyright (C) 2023 Renesas Electronics Corp.
+ */
+
+#include <linux/cacheflush.h>
+#include <linux/cacheinfo.h>
+#include <linux/dma-direction.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include <asm/dma-noncoherent.h>
+
+/* L2 cache registers */
+#define AX45MP_L2C_REG_CTL_OFFSET              0x8
+
+#define AX45MP_L2C_REG_C0_CMD_OFFSET           0x40
+#define AX45MP_L2C_REG_C0_ACC_OFFSET           0x48
+#define AX45MP_L2C_REG_STATUS_OFFSET           0x80
+
+/* D-cache operation */
+#define AX45MP_CCTL_L1D_VA_INVAL               0 /* Invalidate an L1 cache entry */
+#define AX45MP_CCTL_L1D_VA_WB                  1 /* Write-back an L1 cache entry */
+
+/* L2 CCTL status */
+#define AX45MP_CCTL_L2_STATUS_IDLE             0
+
+/* L2 CCTL status cores mask */
+#define AX45MP_CCTL_L2_STATUS_C0_MASK          0xf
+
+/* L2 cache operation */
+#define AX45MP_CCTL_L2_PA_INVAL                        0x8 /* Invalidate an L2 cache entry */
+#define AX45MP_CCTL_L2_PA_WB                   0x9 /* Write-back an L2 cache entry */
+
+#define AX45MP_L2C_REG_PER_CORE_OFFSET         0x10
+#define AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET  4
+
+#define AX45MP_L2C_REG_CN_CMD_OFFSET(n)        \
+       (AX45MP_L2C_REG_C0_CMD_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET))
+#define AX45MP_L2C_REG_CN_ACC_OFFSET(n)        \
+       (AX45MP_L2C_REG_C0_ACC_OFFSET + ((n) * AX45MP_L2C_REG_PER_CORE_OFFSET))
+#define AX45MP_CCTL_L2_STATUS_CN_MASK(n)       \
+       (AX45MP_CCTL_L2_STATUS_C0_MASK << ((n) * AX45MP_CCTL_L2_STATUS_PER_CORE_OFFSET))
+
+#define AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM     0x80b
+#define AX45MP_CCTL_REG_UCCTLCOMMAND_NUM       0x80c
+
+#define AX45MP_CACHE_LINE_SIZE                 64
+
+struct ax45mp_priv {
+       void __iomem *l2c_base;
+       u32 ax45mp_cache_line_size;
+};
+
+static struct ax45mp_priv ax45mp_priv;
+
+/* L2 Cache operations */
+static inline uint32_t ax45mp_cpu_l2c_get_cctl_status(void)
+{
+       return readl(ax45mp_priv.l2c_base + AX45MP_L2C_REG_STATUS_OFFSET);
+}
+
+static void ax45mp_cpu_cache_operation(unsigned long start, unsigned long end,
+                                      unsigned int l1_op, unsigned int l2_op)
+{
+       unsigned long line_size = ax45mp_priv.ax45mp_cache_line_size;
+       void __iomem *base = ax45mp_priv.l2c_base;
+       int mhartid = smp_processor_id();
+       unsigned long pa;
+
+       while (end > start) {
+               csr_write(AX45MP_CCTL_REG_UCCTLBEGINADDR_NUM, start);
+               csr_write(AX45MP_CCTL_REG_UCCTLCOMMAND_NUM, l1_op);
+
+               pa = virt_to_phys((void *)start);
+               writel(pa, base + AX45MP_L2C_REG_CN_ACC_OFFSET(mhartid));
+               writel(l2_op, base + AX45MP_L2C_REG_CN_CMD_OFFSET(mhartid));
+               while ((ax45mp_cpu_l2c_get_cctl_status() &
+                       AX45MP_CCTL_L2_STATUS_CN_MASK(mhartid)) !=
+                       AX45MP_CCTL_L2_STATUS_IDLE)
+                       ;
+
+               start += line_size;
+       }
+}
+
+/* Write-back L1 and L2 cache entry */
+static inline void ax45mp_cpu_dcache_wb_range(unsigned long start, unsigned long end)
+{
+       ax45mp_cpu_cache_operation(start, end, AX45MP_CCTL_L1D_VA_WB,
+                                  AX45MP_CCTL_L2_PA_WB);
+}
+
+/* Invalidate the L1 and L2 cache entry */
+static inline void ax45mp_cpu_dcache_inval_range(unsigned long start, unsigned long end)
+{
+       ax45mp_cpu_cache_operation(start, end, AX45MP_CCTL_L1D_VA_INVAL,
+                                  AX45MP_CCTL_L2_PA_INVAL);
+}
+
+static void ax45mp_dma_cache_inv(phys_addr_t paddr, size_t size)
+{
+       unsigned long start = (unsigned long)phys_to_virt(paddr);
+       unsigned long end = start + size;
+       unsigned long line_size;
+       unsigned long flags;
+
+       if (unlikely(start == end))
+               return;
+
+       line_size = ax45mp_priv.ax45mp_cache_line_size;
+
+       start = start & (~(line_size - 1));
+       end = ((end + line_size - 1) & (~(line_size - 1)));
+
+       local_irq_save(flags);
+
+       ax45mp_cpu_dcache_inval_range(start, end);
+
+       local_irq_restore(flags);
+}
+
+static void ax45mp_dma_cache_wback(phys_addr_t paddr, size_t size)
+{
+       unsigned long start = (unsigned long)phys_to_virt(paddr);
+       unsigned long end = start + size;
+       unsigned long line_size;
+       unsigned long flags;
+
+       line_size = ax45mp_priv.ax45mp_cache_line_size;
+       start = start & (~(line_size - 1));
+       local_irq_save(flags);
+       ax45mp_cpu_dcache_wb_range(start, end);
+       local_irq_restore(flags);
+}
+
+static void ax45mp_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
+{
+       ax45mp_dma_cache_wback(paddr, size);
+       ax45mp_dma_cache_inv(paddr, size);
+}
+
+static int ax45mp_get_l2_line_size(struct device_node *np)
+{
+       int ret;
+
+       ret = of_property_read_u32(np, "cache-line-size", &ax45mp_priv.ax45mp_cache_line_size);
+       if (ret) {
+               pr_err("Failed to get cache-line-size, defaulting to 64 bytes\n");
+               return ret;
+       }
+
+       if (ax45mp_priv.ax45mp_cache_line_size != AX45MP_CACHE_LINE_SIZE) {
+               pr_err("Expected cache-line-size to be 64 bytes (found:%u)\n",
+                      ax45mp_priv.ax45mp_cache_line_size);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static const struct riscv_nonstd_cache_ops ax45mp_cmo_ops __initdata = {
+       .wback = &ax45mp_dma_cache_wback,
+       .inv = &ax45mp_dma_cache_inv,
+       .wback_inv = &ax45mp_dma_cache_wback_inv,
+};
+
+static const struct of_device_id ax45mp_cache_ids[] = {
+       { .compatible = "andestech,ax45mp-cache" },
+       { /* sentinel */ }
+};
+
+static int __init ax45mp_cache_init(void)
+{
+       struct device_node *np;
+       struct resource res;
+       int ret;
+
+       np = of_find_matching_node(NULL, ax45mp_cache_ids);
+       if (!of_device_is_available(np))
+               return -ENODEV;
+
+       ret = of_address_to_resource(np, 0, &res);
+       if (ret)
+               return ret;
+
+       /*
+        * If IOCP is present on the Andes AX45MP core riscv_cbom_block_size
+        * will be 0 for sure, so we can definitely rely on it. If
+        * riscv_cbom_block_size = 0 we don't need to handle CMO using SW any
+        * more so we just return success here and only if its being set we
+        * continue further in the probe path.
+        */
+       if (!riscv_cbom_block_size)
+               return 0;
+
+       ax45mp_priv.l2c_base = ioremap(res.start, resource_size(&res));
+       if (!ax45mp_priv.l2c_base)
+               return -ENOMEM;
+
+       ret = ax45mp_get_l2_line_size(np);
+       if (ret) {
+               iounmap(ax45mp_priv.l2c_base);
+               return ret;
+       }
+
+       riscv_noncoherent_register_cache_ops(&ax45mp_cmo_ops);
+
+       return 0;
+}
+early_initcall(ax45mp_cache_init);
index 9eb1a18..ea085b1 100644 (file)
@@ -463,28 +463,6 @@ static bool crb_req_canceled(struct tpm_chip *chip, u8 status)
        return (cancel & CRB_CANCEL_INVOKE) == CRB_CANCEL_INVOKE;
 }
 
-static int crb_check_flags(struct tpm_chip *chip)
-{
-       u32 val;
-       int ret;
-
-       ret = crb_request_locality(chip, 0);
-       if (ret)
-               return ret;
-
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val, NULL);
-       if (ret)
-               goto release;
-
-       if (val == 0x414D4400U /* AMD */)
-               chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
-
-release:
-       crb_relinquish_locality(chip, 0);
-
-       return ret;
-}
-
 static const struct tpm_class_ops tpm_crb = {
        .flags = TPM_OPS_AUTO_STARTUP,
        .status = crb_status,
@@ -797,12 +775,13 @@ static int crb_acpi_add(struct acpi_device *device)
                                FW_BUG "TPM2 ACPI table has wrong size %u for start method type %d\n",
                                buf->header.length,
                                ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON);
-                       return -EINVAL;
+                       rc = -EINVAL;
+                       goto out;
                }
                crb_pluton = ACPI_ADD_PTR(struct tpm2_crb_pluton, buf, sizeof(*buf));
                rc = crb_map_pluton(dev, priv, buf, crb_pluton);
                if (rc)
-                       return rc;
+                       goto out;
        }
 
        priv->sm = sm;
@@ -826,9 +805,14 @@ static int crb_acpi_add(struct acpi_device *device)
        if (rc)
                goto out;
 
-       rc = crb_check_flags(chip);
-       if (rc)
-               goto out;
+#ifdef CONFIG_X86
+       /* A quirk for https://www.amd.com/en/support/kb/faq/pa-410 */
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+           priv->sm != ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) {
+               dev_info(dev, "Disabling hwrng\n");
+               chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
+       }
+#endif /* CONFIG_X86 */
 
        rc = tpm_chip_register(chip);
 
index 62962ae..497bc05 100644 (file)
@@ -92,7 +92,7 @@ config MICROCHIP_TCB_CAPTURE
 
 config RZ_MTU3_CNT
        tristate "Renesas RZ/G2L MTU3a counter driver"
-       depends on RZ_MTU3 || COMPILE_TEST
+       depends on RZ_MTU3
        help
          Enable support for MTU3a counter driver found on Renesas RZ/G2L alike
          SoCs. This IP supports both 16-bit and 32-bit phase counting mode
index a757f90..60ed890 100644 (file)
@@ -86,6 +86,7 @@ static void cpufreq_governor_limits(struct cpufreq_policy *policy);
 static int cpufreq_set_policy(struct cpufreq_policy *policy,
                              struct cpufreq_governor *new_gov,
                              unsigned int new_pol);
+static bool cpufreq_boost_supported(void);
 
 /*
  * Two notifier lists: the "policy" list is involved in the
@@ -455,8 +456,10 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
                            policy->cur,
                            policy->cpuinfo.max_freq);
 
+       spin_lock(&policy->transition_lock);
        policy->transition_ongoing = false;
        policy->transition_task = NULL;
+       spin_unlock(&policy->transition_lock);
 
        wake_up(&policy->transition_wait);
 }
@@ -621,6 +624,40 @@ static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr,
 }
 define_one_global_rw(boost);
 
+static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf)
+{
+       return sysfs_emit(buf, "%d\n", policy->boost_enabled);
+}
+
+static ssize_t store_local_boost(struct cpufreq_policy *policy,
+                                const char *buf, size_t count)
+{
+       int ret, enable;
+
+       ret = kstrtoint(buf, 10, &enable);
+       if (ret || enable < 0 || enable > 1)
+               return -EINVAL;
+
+       if (!cpufreq_driver->boost_enabled)
+               return -EINVAL;
+
+       if (policy->boost_enabled == enable)
+               return count;
+
+       cpus_read_lock();
+       ret = cpufreq_driver->set_boost(policy, enable);
+       cpus_read_unlock();
+
+       if (ret)
+               return ret;
+
+       policy->boost_enabled = enable;
+
+       return count;
+}
+
+static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost);
+
 static struct cpufreq_governor *find_governor(const char *str_governor)
 {
        struct cpufreq_governor *t;
@@ -1055,6 +1092,12 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
                        return ret;
        }
 
+       if (cpufreq_boost_supported()) {
+               ret = sysfs_create_file(&policy->kobj, &local_boost.attr);
+               if (ret)
+                       return ret;
+       }
+
        return 0;
 }
 
@@ -1943,16 +1986,16 @@ void cpufreq_resume(void)
 
        for_each_active_policy(policy) {
                if (cpufreq_driver->resume && cpufreq_driver->resume(policy)) {
-                       pr_err("%s: Failed to resume driver: %p\n", __func__,
-                               policy);
+                       pr_err("%s: Failed to resume driver: %s\n", __func__,
+                               cpufreq_driver->name);
                } else if (has_target()) {
                        down_write(&policy->rwsem);
                        ret = cpufreq_start_governor(policy);
                        up_write(&policy->rwsem);
 
                        if (ret)
-                               pr_err("%s: Failed to start governor for policy: %p\n",
-                                      __func__, policy);
+                               pr_err("%s: Failed to start governor for CPU%u's policy\n",
+                                      __func__, policy->cpu);
                }
        }
 }
@@ -2716,6 +2759,8 @@ int cpufreq_boost_trigger_state(int state)
                ret = cpufreq_driver->set_boost(policy, state);
                if (ret)
                        goto err_reset_state;
+
+               policy->boost_enabled = state;
        }
        cpus_read_unlock();
 
index 85da677..af44ee6 100644 (file)
@@ -439,7 +439,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
 
        ret = gov->init(dbs_data);
        if (ret)
-               goto free_policy_dbs_info;
+               goto free_dbs_data;
 
        /*
         * The sampling interval should not be less than the transition latency
@@ -474,6 +474,8 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
        if (!have_governor_per_policy())
                gov->gdbs_data = NULL;
        gov->exit(dbs_data);
+
+free_dbs_data:
        kfree(dbs_data);
 
 free_policy_dbs_info:
index 84fe37d..6f8b5ea 100644 (file)
@@ -232,8 +232,8 @@ static int pcc_cpufreq_target(struct cpufreq_policy *policy,
        status = ioread16(&pcch_hdr->status);
        iowrite16(0, &pcch_hdr->status);
 
-       cpufreq_freq_transition_end(policy, &freqs, status != CMD_COMPLETE);
        spin_unlock(&pcc_lock);
+       cpufreq_freq_transition_end(policy, &freqs, status != CMD_COMPLETE);
 
        if (status != CMD_COMPLETE) {
                pr_debug("target: FAILED for cpu %d, with status: 0x%x\n",
index 92389a5..a1157c2 100644 (file)
@@ -86,10 +86,10 @@ lib-$(CONFIG_EFI_GENERIC_STUB)      += efi-stub.o string.o intrinsics.o systable.o \
                                   screen_info.o efi-stub-entry.o
 
 lib-$(CONFIG_ARM)              += arm32-stub.o
-lib-$(CONFIG_ARM64)            += arm64.o arm64-stub.o smbios.o
+lib-$(CONFIG_ARM64)            += kaslr.o arm64.o arm64-stub.o smbios.o
 lib-$(CONFIG_X86)              += x86-stub.o
 lib-$(CONFIG_X86_64)           += x86-5lvl.o
-lib-$(CONFIG_RISCV)            += riscv.o riscv-stub.o
+lib-$(CONFIG_RISCV)            += kaslr.o riscv.o riscv-stub.o
 lib-$(CONFIG_LOONGARCH)                += loongarch.o loongarch-stub.o
 
 CFLAGS_arm32-stub.o            := -DTEXT_OFFSET=$(TEXT_OFFSET)
index 8c40fc8..452b7cc 100644 (file)
 
 #include "efistub.h"
 
-/*
- * Distro versions of GRUB may ignore the BSS allocation entirely (i.e., fail
- * to provide space, and fail to zero it). Check for this condition by double
- * checking that the first and the last byte of the image are covered by the
- * same EFI memory map entry.
- */
-static bool check_image_region(u64 base, u64 size)
-{
-       struct efi_boot_memmap *map;
-       efi_status_t status;
-       bool ret = false;
-       int map_offset;
-
-       status = efi_get_memory_map(&map, false);
-       if (status != EFI_SUCCESS)
-               return false;
-
-       for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) {
-               efi_memory_desc_t *md = (void *)map->map + map_offset;
-               u64 end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE;
-
-               /*
-                * Find the region that covers base, and return whether
-                * it covers base+size bytes.
-                */
-               if (base >= md->phys_addr && base < end) {
-                       ret = (base + size) <= end;
-                       break;
-               }
-       }
-
-       efi_bs_call(free_pool, map);
-
-       return ret;
-}
-
 efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 unsigned long *image_size,
                                 unsigned long *reserve_addr,
@@ -59,31 +23,6 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
 {
        efi_status_t status;
        unsigned long kernel_size, kernel_codesize, kernel_memsize;
-       u32 phys_seed = 0;
-       u64 min_kimg_align = efi_get_kimg_min_align();
-
-       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
-               efi_guid_t li_fixed_proto = LINUX_EFI_LOADED_IMAGE_FIXED_GUID;
-               void *p;
-
-               if (efi_nokaslr) {
-                       efi_info("KASLR disabled on kernel command line\n");
-               } else if (efi_bs_call(handle_protocol, image_handle,
-                                      &li_fixed_proto, &p) == EFI_SUCCESS) {
-                       efi_info("Image placement fixed by loader\n");
-               } else {
-                       status = efi_get_random_bytes(sizeof(phys_seed),
-                                                     (u8 *)&phys_seed);
-                       if (status == EFI_NOT_FOUND) {
-                               efi_info("EFI_RNG_PROTOCOL unavailable\n");
-                               efi_nokaslr = true;
-                       } else if (status != EFI_SUCCESS) {
-                               efi_err("efi_get_random_bytes() failed (0x%lx)\n",
-                                       status);
-                               efi_nokaslr = true;
-                       }
-               }
-       }
 
        if (image->image_base != _text) {
                efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
@@ -98,50 +37,15 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
        kernel_codesize = __inittext_end - _text;
        kernel_memsize = kernel_size + (_end - _edata);
        *reserve_size = kernel_memsize;
+       *image_addr = (unsigned long)_text;
 
-       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) {
-               /*
-                * If KASLR is enabled, and we have some randomness available,
-                * locate the kernel at a randomized offset in physical memory.
-                */
-               status = efi_random_alloc(*reserve_size, min_kimg_align,
-                                         reserve_addr, phys_seed,
-                                         EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
-               if (status != EFI_SUCCESS)
-                       efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
-       } else {
-               status = EFI_OUT_OF_RESOURCES;
-       }
-
-       if (status != EFI_SUCCESS) {
-               if (!check_image_region((u64)_text, kernel_memsize)) {
-                       efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n");
-               } else if (IS_ALIGNED((u64)_text, min_kimg_align) &&
-                          (u64)_end < EFI_ALLOC_LIMIT) {
-                       /*
-                        * Just execute from wherever we were loaded by the
-                        * UEFI PE/COFF loader if the placement is suitable.
-                        */
-                       *image_addr = (u64)_text;
-                       *reserve_size = 0;
-                       return EFI_SUCCESS;
-               }
-
-               status = efi_allocate_pages_aligned(*reserve_size, reserve_addr,
-                                                   ULONG_MAX, min_kimg_align,
-                                                   EFI_LOADER_CODE);
-
-               if (status != EFI_SUCCESS) {
-                       efi_err("Failed to relocate kernel\n");
-                       *reserve_size = 0;
-                       return status;
-               }
-       }
-
-       *image_addr = *reserve_addr;
-       memcpy((void *)*image_addr, _text, kernel_size);
-       caches_clean_inval_pou(*image_addr, *image_addr + kernel_codesize);
-       efi_remap_image(*image_addr, *reserve_size, kernel_codesize);
+       status = efi_kaslr_relocate_kernel(image_addr,
+                                          reserve_addr, reserve_size,
+                                          kernel_size, kernel_codesize,
+                                          kernel_memsize,
+                                          efi_kaslr_get_phys_seed(image_handle));
+       if (status != EFI_SUCCESS)
+               return status;
 
        return EFI_SUCCESS;
 }
@@ -159,3 +63,8 @@ unsigned long primary_entry_offset(void)
         */
        return (char *)primary_entry - _text;
 }
+
+void efi_icache_sync(unsigned long start, unsigned long end)
+{
+       caches_clean_inval_pou(start, end);
+}
index 9823f6f..212687c 100644 (file)
@@ -1133,6 +1133,14 @@ const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record,
 
 void efi_remap_image(unsigned long image_base, unsigned alloc_size,
                     unsigned long code_size);
+efi_status_t efi_kaslr_relocate_kernel(unsigned long *image_addr,
+                                      unsigned long *reserve_addr,
+                                      unsigned long *reserve_size,
+                                      unsigned long kernel_size,
+                                      unsigned long kernel_codesize,
+                                      unsigned long kernel_memsize,
+                                      u32 phys_seed);
+u32 efi_kaslr_get_phys_seed(efi_handle_t image_handle);
 
 asmlinkage efi_status_t __efiapi
 efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab);
diff --git a/drivers/firmware/efi/libstub/kaslr.c b/drivers/firmware/efi/libstub/kaslr.c
new file mode 100644 (file)
index 0000000..62d63f7
--- /dev/null
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helper functions used by the EFI stub on multiple
+ * architectures to deal with physical address space randomization.
+ */
+#include <linux/efi.h>
+
+#include "efistub.h"
+
+/**
+ * efi_kaslr_get_phys_seed() - Get random seed for physical kernel KASLR
+ * @image_handle:      Handle to the image
+ *
+ * If KASLR is not disabled, obtain a random seed using EFI_RNG_PROTOCOL
+ * that will be used to move the kernel physical mapping.
+ *
+ * Return:     the random seed
+ */
+u32 efi_kaslr_get_phys_seed(efi_handle_t image_handle)
+{
+       efi_status_t status;
+       u32 phys_seed;
+       efi_guid_t li_fixed_proto = LINUX_EFI_LOADED_IMAGE_FIXED_GUID;
+       void *p;
+
+       if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+               return 0;
+
+       if (efi_nokaslr) {
+               efi_info("KASLR disabled on kernel command line\n");
+       } else if (efi_bs_call(handle_protocol, image_handle,
+                              &li_fixed_proto, &p) == EFI_SUCCESS) {
+               efi_info("Image placement fixed by loader\n");
+       } else {
+               status = efi_get_random_bytes(sizeof(phys_seed),
+                                             (u8 *)&phys_seed);
+               if (status == EFI_SUCCESS) {
+                       return phys_seed;
+               } else if (status == EFI_NOT_FOUND) {
+                       efi_info("EFI_RNG_PROTOCOL unavailable\n");
+                       efi_nokaslr = true;
+               } else if (status != EFI_SUCCESS) {
+                       efi_err("efi_get_random_bytes() failed (0x%lx)\n",
+                               status);
+                       efi_nokaslr = true;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Distro versions of GRUB may ignore the BSS allocation entirely (i.e., fail
+ * to provide space, and fail to zero it). Check for this condition by double
+ * checking that the first and the last byte of the image are covered by the
+ * same EFI memory map entry.
+ */
+static bool check_image_region(u64 base, u64 size)
+{
+       struct efi_boot_memmap *map;
+       efi_status_t status;
+       bool ret = false;
+       int map_offset;
+
+       status = efi_get_memory_map(&map, false);
+       if (status != EFI_SUCCESS)
+               return false;
+
+       for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) {
+               efi_memory_desc_t *md = (void *)map->map + map_offset;
+               u64 end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE;
+
+               /*
+                * Find the region that covers base, and return whether
+                * it covers base+size bytes.
+                */
+               if (base >= md->phys_addr && base < end) {
+                       ret = (base + size) <= end;
+                       break;
+               }
+       }
+
+       efi_bs_call(free_pool, map);
+
+       return ret;
+}
+
+/**
+ * efi_kaslr_relocate_kernel() - Relocate the kernel (random if KASLR enabled)
+ * @image_addr: Pointer to the current kernel location
+ * @reserve_addr:      Pointer to the relocated kernel location
+ * @reserve_size:      Size of the relocated kernel
+ * @kernel_size:       Size of the text + data
+ * @kernel_codesize:   Size of the text
+ * @kernel_memsize:    Size of the text + data + bss
+ * @phys_seed:         Random seed used for the relocation
+ *
+ * If KASLR is not enabled, this function relocates the kernel to a fixed
+ * address (or leave it as its current location). If KASLR is enabled, the
+ * kernel physical location is randomized using the seed in parameter.
+ *
+ * Return:     status code, EFI_SUCCESS if relocation is successful
+ */
+efi_status_t efi_kaslr_relocate_kernel(unsigned long *image_addr,
+                                      unsigned long *reserve_addr,
+                                      unsigned long *reserve_size,
+                                      unsigned long kernel_size,
+                                      unsigned long kernel_codesize,
+                                      unsigned long kernel_memsize,
+                                      u32 phys_seed)
+{
+       efi_status_t status;
+       u64 min_kimg_align = efi_get_kimg_min_align();
+
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) {
+               /*
+                * If KASLR is enabled, and we have some randomness available,
+                * locate the kernel at a randomized offset in physical memory.
+                */
+               status = efi_random_alloc(*reserve_size, min_kimg_align,
+                                         reserve_addr, phys_seed,
+                                         EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
+               if (status != EFI_SUCCESS)
+                       efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
+       } else {
+               status = EFI_OUT_OF_RESOURCES;
+       }
+
+       if (status != EFI_SUCCESS) {
+               if (!check_image_region(*image_addr, kernel_memsize)) {
+                       efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n");
+               } else if (IS_ALIGNED(*image_addr, min_kimg_align) &&
+                          (unsigned long)_end < EFI_ALLOC_LIMIT) {
+                       /*
+                        * Just execute from wherever we were loaded by the
+                        * UEFI PE/COFF loader if the placement is suitable.
+                        */
+                       *reserve_size = 0;
+                       return EFI_SUCCESS;
+               }
+
+               status = efi_allocate_pages_aligned(*reserve_size, reserve_addr,
+                                                   ULONG_MAX, min_kimg_align,
+                                                   EFI_LOADER_CODE);
+
+               if (status != EFI_SUCCESS) {
+                       efi_err("Failed to relocate kernel\n");
+                       *reserve_size = 0;
+                       return status;
+               }
+       }
+
+       memcpy((void *)*reserve_addr, (void *)*image_addr, kernel_size);
+       *image_addr = *reserve_addr;
+       efi_icache_sync(*image_addr, *image_addr + kernel_codesize);
+       efi_remap_image(*image_addr, *reserve_size, kernel_codesize);
+
+       return status;
+}
index 145c9f0..c96d6dc 100644 (file)
@@ -30,32 +30,29 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                                 efi_loaded_image_t *image,
                                 efi_handle_t image_handle)
 {
-       unsigned long kernel_size = 0;
-       unsigned long preferred_addr;
+       unsigned long kernel_size, kernel_codesize, kernel_memsize;
        efi_status_t status;
 
        kernel_size = _edata - _start;
+       kernel_codesize = __init_text_end - _start;
+       kernel_memsize = kernel_size + (_end - _edata);
        *image_addr = (unsigned long)_start;
-       *image_size = kernel_size + (_end - _edata);
-
-       /*
-        * RISC-V kernel maps PAGE_OFFSET virtual address to the same physical
-        * address where kernel is booted. That's why kernel should boot from
-        * as low as possible to avoid wastage of memory. Currently, dram_base
-        * is occupied by the firmware. So the preferred address for kernel to
-        * boot is next aligned address. If preferred address is not available,
-        * relocate_kernel will fall back to efi_low_alloc_above to allocate
-        * lowest possible memory region as long as the address and size meets
-        * the alignment constraints.
-        */
-       preferred_addr = EFI_KIMG_PREFERRED_ADDRESS;
-       status = efi_relocate_kernel(image_addr, kernel_size, *image_size,
-                                    preferred_addr, efi_get_kimg_min_align(),
-                                    0x0);
+       *image_size = kernel_memsize;
+       *reserve_size = *image_size;
 
+       status = efi_kaslr_relocate_kernel(image_addr,
+                                          reserve_addr, reserve_size,
+                                          kernel_size, kernel_codesize, kernel_memsize,
+                                          efi_kaslr_get_phys_seed(image_handle));
        if (status != EFI_SUCCESS) {
                efi_err("Failed to relocate kernel\n");
                *image_size = 0;
        }
+
        return status;
 }
+
+void efi_icache_sync(unsigned long start, unsigned long end)
+{
+       asm volatile ("fence.i" ::: "memory");
+}
index 0a7264a..324e942 100644 (file)
@@ -575,6 +575,26 @@ static int zynq_gpio_set_wake(struct irq_data *data, unsigned int on)
        return 0;
 }
 
+static int zynq_gpio_irq_reqres(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+       int ret;
+
+       ret = pm_runtime_resume_and_get(chip->parent);
+       if (ret < 0)
+               return ret;
+
+       return gpiochip_reqres_irq(chip, d->hwirq);
+}
+
+static void zynq_gpio_irq_relres(struct irq_data *d)
+{
+       struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+       gpiochip_relres_irq(chip, d->hwirq);
+       pm_runtime_put(chip->parent);
+}
+
 /* irq chip descriptor */
 static const struct irq_chip zynq_gpio_level_irqchip = {
        .name           = DRIVER_NAME,
@@ -584,9 +604,10 @@ static const struct irq_chip zynq_gpio_level_irqchip = {
        .irq_unmask     = zynq_gpio_irq_unmask,
        .irq_set_type   = zynq_gpio_set_irq_type,
        .irq_set_wake   = zynq_gpio_set_wake,
+       .irq_request_resources = zynq_gpio_irq_reqres,
+       .irq_release_resources = zynq_gpio_irq_relres,
        .flags          = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED |
                          IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_IMMUTABLE,
-       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 static const struct irq_chip zynq_gpio_edge_irqchip = {
@@ -597,8 +618,9 @@ static const struct irq_chip zynq_gpio_edge_irqchip = {
        .irq_unmask     = zynq_gpio_irq_unmask,
        .irq_set_type   = zynq_gpio_set_irq_type,
        .irq_set_wake   = zynq_gpio_set_wake,
+       .irq_request_resources = zynq_gpio_irq_reqres,
+       .irq_release_resources = zynq_gpio_irq_relres,
        .flags          = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_IMMUTABLE,
-       GPIOCHIP_IRQ_RESOURCE_HELPERS,
 };
 
 static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio,
index df633e9..cdf6087 100644 (file)
@@ -442,9 +442,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
                        mem_info->local_mem_size_public,
                        mem_info->local_mem_size_private);
 
-       if (amdgpu_sriov_vf(adev))
-               mem_info->mem_clk_max = adev->clock.default_mclk / 100;
-       else if (adev->pm.dpm_enabled) {
+       if (adev->pm.dpm_enabled) {
                if (amdgpu_emu_mode == 1)
                        mem_info->mem_clk_max = 0;
                else
@@ -463,9 +461,7 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev)
 uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
 {
        /* the sclk is in quantas of 10kHz */
-       if (amdgpu_sriov_vf(adev))
-               return adev->clock.default_sclk / 100;
-       else if (adev->pm.dpm_enabled)
+       if (adev->pm.dpm_enabled)
                return amdgpu_dpm_get_sclk(adev, false) / 100;
        else
                return 100;
index 835980e..fb2681d 100644 (file)
@@ -217,6 +217,7 @@ union umc_info {
        struct atom_umc_info_v3_1 v31;
        struct atom_umc_info_v3_2 v32;
        struct atom_umc_info_v3_3 v33;
+       struct atom_umc_info_v4_0 v40;
 };
 
 union vram_info {
@@ -508,9 +509,8 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
 
        if (amdgpu_atom_parse_data_header(mode_info->atom_context,
                                index, &size, &frev, &crev, &data_offset)) {
+               umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
                if (frev == 3) {
-                       umc_info = (union umc_info *)
-                               (mode_info->atom_context->bios + data_offset);
                        switch (crev) {
                        case 1:
                                umc_config = le32_to_cpu(umc_info->v31.umc_config);
@@ -533,6 +533,20 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev)
                                /* unsupported crev */
                                return false;
                        }
+               } else if (frev == 4) {
+                       switch (crev) {
+                       case 0:
+                               umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
+                               ecc_default_enabled =
+                                       (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+                               break;
+                       default:
+                               /* unsupported crev */
+                               return false;
+                       }
+               } else {
+                       /* unsupported frev */
+                       return false;
                }
        }
 
index 49dd9aa..efdb1c4 100644 (file)
@@ -127,7 +127,6 @@ static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
 {
        struct drm_gem_object *gobj;
        unsigned long size;
-       int r;
 
        gobj = drm_gem_object_lookup(p->filp, data->handle);
        if (gobj == NULL)
@@ -137,23 +136,14 @@ static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
        drm_gem_object_put(gobj);
 
        size = amdgpu_bo_size(p->uf_bo);
-       if (size != PAGE_SIZE || (data->offset + 8) > size) {
-               r = -EINVAL;
-               goto error_unref;
-       }
+       if (size != PAGE_SIZE || data->offset > (size - 8))
+               return -EINVAL;
 
-       if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm)) {
-               r = -EINVAL;
-               goto error_unref;
-       }
+       if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))
+               return -EINVAL;
 
        *offset = data->offset;
-
        return 0;
-
-error_unref:
-       amdgpu_bo_unref(&p->uf_bo);
-       return r;
 }
 
 static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
index e77f048..3f001a5 100644 (file)
@@ -885,13 +885,20 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
  */
 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
 {
+       int ret;
+
        amdgpu_asic_pre_asic_init(adev);
 
        if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
-           adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
-               return amdgpu_atomfirmware_asic_init(adev, true);
-       else
+           adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+               amdgpu_psp_wait_for_bootloader(adev);
+               ret = amdgpu_atomfirmware_asic_init(adev, true);
+               return ret;
+       } else {
                return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+       }
+
+       return 0;
 }
 
 /**
@@ -4694,9 +4701,12 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
        }
 
        if (ret)
-               dev_err(adev->dev, "GPU mode1 reset failed\n");
+               goto mode1_reset_failed;
 
        amdgpu_device_load_pci_state(adev->pdev);
+       ret = amdgpu_psp_wait_for_bootloader(adev);
+       if (ret)
+               goto mode1_reset_failed;
 
        /* wait for asic to come out of reset */
        for (i = 0; i < adev->usec_timeout; i++) {
@@ -4707,7 +4717,17 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
                udelay(1);
        }
 
+       if (i >= adev->usec_timeout) {
+               ret = -ETIMEDOUT;
+               goto mode1_reset_failed;
+       }
+
        amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+       return 0;
+
+mode1_reset_failed:
+       dev_err(adev->dev, "GPU mode1 reset failed\n");
        return ret;
 }
 
@@ -4849,7 +4869,7 @@ static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
        struct drm_device *dev = adev_to_drm(adev);
 
        ktime_get_ts64(&adev->reset_time);
-       dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
+       dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
                      amdgpu_devcoredump_read, amdgpu_devcoredump_free);
 }
 #endif
index 74ffe65..7d5e7ad 100644 (file)
@@ -1390,6 +1390,7 @@ union gc_info {
        struct gc_info_v1_1 v1_1;
        struct gc_info_v1_2 v1_2;
        struct gc_info_v2_0 v2;
+       struct gc_info_v2_1 v2_1;
 };
 
 static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
@@ -1465,6 +1466,15 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
                adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
                        le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
                adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+               if (gc_info->v2.header.version_minor == 1) {
+                       adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
+                       adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
+                       adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
+                       adev->gfx.config.gc_num_cu_per_sqc = le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc);
+                       adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc);
+                       adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc);
+                       adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */
+               }
                break;
        default:
                dev_err(adev->dev,
@@ -1478,6 +1488,7 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
 
 union mall_info {
        struct mall_info_v1_0 v1;
+       struct mall_info_v2_0 v2;
 };
 
 static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
@@ -1518,6 +1529,10 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
                adev->gmc.mall_size = mall_size;
                adev->gmc.m_half_use = half_use;
                break;
+       case 2:
+               mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc);
+               adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc;
+               break;
        default:
                dev_err(adev->dev,
                        "Unhandled MALL info table %d.%d\n",
index d20dd3f..363e6a2 100644 (file)
@@ -38,6 +38,8 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
@@ -532,11 +534,29 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
        return true;
 }
 
+static int amdgpu_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file,
+                         unsigned int flags, unsigned int color,
+                         struct drm_clip_rect *clips, unsigned int num_clips)
+{
+
+       if (file)
+               return -ENOSYS;
+
+       return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips,
+                                        num_clips);
+}
+
 static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
        .destroy = drm_gem_fb_destroy,
        .create_handle = drm_gem_fb_create_handle,
 };
 
+static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
+       .destroy = drm_gem_fb_destroy,
+       .create_handle = drm_gem_fb_create_handle,
+       .dirty = amdgpu_dirtyfb
+};
+
 uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
                                          uint64_t bo_flags)
 {
@@ -1139,7 +1159,11 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
        if (ret)
                goto err;
 
-       ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+       if (drm_drv_uses_atomic_modeset(dev))
+               ret = drm_framebuffer_init(dev, &rfb->base,
+                                          &amdgpu_fb_funcs_atomic);
+       else
+               ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
 
        if (ret)
                goto err;
index a4ff515..395c176 100644 (file)
@@ -241,6 +241,9 @@ struct amdgpu_gfx_config {
        uint32_t gc_gl1c_per_sa;
        uint32_t gc_gl1c_size_per_instance;
        uint32_t gc_gl2c_per_gpu;
+       uint32_t gc_tcp_size_per_cu;
+       uint32_t gc_num_cu_per_sqc;
+       uint32_t gc_tcc_size;
 };
 
 struct amdgpu_cu_info {
index 8fdca54..429ef21 100644 (file)
@@ -2078,6 +2078,17 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
 }
 /* SECUREDISPLAY end */
 
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
+{
+       struct psp_context *psp = &adev->psp;
+       int ret = 0;
+
+       if (!amdgpu_sriov_vf(adev) && psp->funcs && psp->funcs->wait_for_bootloader != NULL)
+               ret = psp->funcs->wait_for_bootloader(psp);
+
+       return ret;
+}
+
 static int psp_hw_start(struct psp_context *psp)
 {
        struct amdgpu_device *adev = psp->adev;
index 3384eb9..3e67ed6 100644 (file)
@@ -109,6 +109,7 @@ enum psp_reg_prog_id {
 
 struct psp_funcs {
        int (*init_microcode)(struct psp_context *psp);
+       int (*wait_for_bootloader)(struct psp_context *psp);
        int (*bootloader_load_kdb)(struct psp_context *psp);
        int (*bootloader_load_spl)(struct psp_context *psp);
        int (*bootloader_load_sysdrv)(struct psp_context *psp);
@@ -533,4 +534,6 @@ int psp_spatial_partition(struct psp_context *psp, int mode);
 
 int is_psp_fw_valid(struct psp_bin_desc bin);
 
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+
 #endif
index 7689395..3c4600e 100644 (file)
@@ -764,7 +764,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        union ta_ras_cmd_input *info;
-       int ret = 0;
+       int ret;
 
        if (!con)
                return -EINVAL;
@@ -773,7 +773,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
        if (enable &&
            head->block != AMDGPU_RAS_BLOCK__GFX &&
            !amdgpu_ras_is_feature_allowed(adev, head))
-               goto out;
+               return 0;
 
        /* Only enable gfx ras feature from host side */
        if (head->block == AMDGPU_RAS_BLOCK__GFX &&
@@ -801,16 +801,16 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
                                enable ? "enable":"disable",
                                get_ras_block_str(head),
                                amdgpu_ras_is_poison_mode_supported(adev), ret);
-                       goto out;
+                       return ret;
                }
+
+               kfree(info);
        }
 
        /* setup the obj */
        __amdgpu_ras_feature_enable(adev, head, enable);
-out:
-       if (head->block == AMDGPU_RAS_BLOCK__GFX)
-               kfree(info);
-       return ret;
+
+       return 0;
 }
 
 /* Only used in device probe stage and called only once. */
@@ -2399,6 +2399,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
        if (amdgpu_sriov_vf(adev)) {
                switch (adev->ip_versions[MP0_HWIP][0]) {
                case IP_VERSION(13, 0, 2):
+               case IP_VERSION(13, 0, 6):
                        return true;
                default:
                        return false;
index 4764d21..595d5e5 100644 (file)
@@ -158,9 +158,10 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
        case IP_VERSION(11, 0, 7): /* Sienna cichlid */
        case IP_VERSION(13, 0, 0):
        case IP_VERSION(13, 0, 2): /* Aldebaran */
-       case IP_VERSION(13, 0, 6):
        case IP_VERSION(13, 0, 10):
                return true;
+       case IP_VERSION(13, 0, 6):
+               return (adev->gmc.is_app_apu) ? false : true;
        default:
                return false;
        }
index 57ed4e5..0a26a00 100644 (file)
@@ -203,6 +203,9 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev)
                if (adev->rev_id == 0) {
                        WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1,
                                              REDUCE_FIFO_DEPTH_BY_2, 2);
+               } else {
+                       WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2,
+                                               SPARE, 0x1);
                }
        }
 }
@@ -860,11 +863,15 @@ static int gfx_v9_4_3_sw_init(void *handle)
        if (r)
                return r;
 
-       r = amdgpu_gfx_sysfs_init(adev);
+       r = amdgpu_gfx_ras_sw_init(adev);
        if (r)
                return r;
 
-       return amdgpu_gfx_ras_sw_init(adev);
+
+       if (!amdgpu_sriov_vf(adev))
+               r = amdgpu_gfx_sysfs_init(adev);
+
+       return r;
 }
 
 static int gfx_v9_4_3_sw_fini(void *handle)
@@ -885,7 +892,8 @@ static int gfx_v9_4_3_sw_fini(void *handle)
        gfx_v9_4_3_mec_fini(adev);
        amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
        gfx_v9_4_3_free_microcode(adev);
-       amdgpu_gfx_sysfs_fini(adev);
+       if (!amdgpu_sriov_vf(adev))
+               amdgpu_gfx_sysfs_fini(adev);
 
        return 0;
 }
@@ -2219,15 +2227,6 @@ static void gfx_v9_4_3_xcc_update_sram_fgcg(struct amdgpu_device *adev,
                WREG32_SOC15(GC, GET_INST(GC, xcc_id),
                             regRLC_CGTT_MGCG_OVERRIDE, data);
 
-       def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL);
-
-       if (enable)
-               data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
-       else
-               data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
-
-       if (def != data)
-               WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL, data);
 }
 
 static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
@@ -4048,7 +4047,8 @@ static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev,
        uint32_t i;
        uint32_t data;
 
-       data = REG_SET_FIELD(0, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
+       data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG);
+       data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
                             amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0);
 
        if (amdgpu_watchdog_timer.timeout_fatal_disable &&
index 1561291..1de79d6 100644 (file)
@@ -360,8 +360,10 @@ static int jpeg_v4_0_3_hw_fini(void *handle)
 
        cancel_delayed_work_sync(&adev->jpeg.idle_work);
 
-       if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
-               ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+       if (!amdgpu_sriov_vf(adev)) {
+               if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+                       ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+       }
 
        return ret;
 }
index 9ea0723..f85eec0 100644 (file)
@@ -437,6 +437,24 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev)
                        XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK);
 
        }
+
+       if (!amdgpu_sriov_vf(adev)) {
+               u32 baco_cntl;
+               for_each_inst(i, adev->aid_mask) {
+                       baco_cntl = RREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL);
+                       if (baco_cntl & (BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+                                        BIF_BX0_BACO_CNTL__BACO_EN_MASK)) {
+                               baco_cntl &= ~(
+                                       BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+                                       BIF_BX0_BACO_CNTL__BACO_EN_MASK);
+                               dev_dbg(adev->dev,
+                                       "Unsetting baco dummy mode %x",
+                                       baco_cntl);
+                               WREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL,
+                                            baco_cntl);
+                       }
+               }
+       }
 }
 
 static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev)
index 10b17bd..469eed0 100644 (file)
@@ -133,12 +133,32 @@ static bool psp_v13_0_is_sos_alive(struct psp_context *psp)
        return sol_reg != 0x0;
 }
 
-static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
 {
        struct amdgpu_device *adev = psp->adev;
+       int retry_loop, ret;
 
-       int ret;
-       int retry_loop;
+       for (retry_loop = 0; retry_loop < 70; retry_loop++) {
+               /* Wait for bootloader to signify that is
+                  ready having bit 31 of C2PMSG_33 set to 1 */
+               ret = psp_wait_for(
+                       psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33),
+                       0x80000000, 0xffffffff, false);
+
+               if (ret == 0)
+                       break;
+       }
+
+       if (ret)
+               dev_warn(adev->dev, "Bootloader wait timed out");
+
+       return ret;
+}
+
+static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+{
+       struct amdgpu_device *adev = psp->adev;
+       int retry_loop, ret;
 
        /* Wait for bootloader to signify that it is ready having bit 31 of
         * C2PMSG_35 set to 1. All other bits are expected to be cleared.
@@ -157,6 +177,19 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
        return ret;
 }
 
+static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
+{
+       struct amdgpu_device *adev = psp->adev;
+
+       if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6)) {
+               psp_v13_0_wait_for_vmbx_ready(psp);
+
+               return psp_v13_0_wait_for_bootloader(psp);
+       }
+
+       return 0;
+}
+
 static int psp_v13_0_bootloader_load_component(struct psp_context      *psp,
                                               struct psp_bin_desc      *bin_desc,
                                               enum psp_bootloader_cmd  bl_cmd)
@@ -714,6 +747,7 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
 
 static const struct psp_funcs psp_v13_0_funcs = {
        .init_microcode = psp_v13_0_init_microcode,
+       .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
        .bootloader_load_kdb = psp_v13_0_bootloader_load_kdb,
        .bootloader_load_spl = psp_v13_0_bootloader_load_spl,
        .bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv,
index c45721c..f5be40d 100644 (file)
@@ -559,8 +559,10 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
                 */
                if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
                        return AMD_RESET_METHOD_MODE2;
+               else if (!(adev->flags & AMD_IS_APU))
+                       return AMD_RESET_METHOD_MODE1;
                else
-                       return AMD_RESET_METHOD_NONE;
+                       return AMD_RESET_METHOD_MODE2;
        default:
                break;
        }
index f0731a6..830396b 100644 (file)
@@ -384,7 +384,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
                        default:
                                break;
                        }
-                       kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
+                       kfd_signal_event_interrupt(pasid, sq_int_data, 24);
                } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
                        kfd_set_dbg_ev_from_interrupt(dev, pasid,
                                KFD_DEBUG_DOORBELL_ID(context_id0),
index 2319467..0bbf0ed 100644 (file)
@@ -457,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
                mqd->is_occupied = kfd_is_occupied_cp;
                mqd->mqd_size = sizeof(struct v11_compute_mqd);
                mqd->get_wave_state = get_wave_state;
+               mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
@@ -472,6 +473,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
                mqd->destroy_mqd = destroy_hiq_mqd;
                mqd->is_occupied = kfd_is_occupied_cp;
                mqd->mqd_size = sizeof(struct v11_compute_mqd);
+               mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
@@ -501,6 +503,7 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
                mqd->destroy_mqd = kfd_destroy_mqd_sdma;
                mqd->is_occupied = kfd_is_occupied_sdma;
                mqd->mqd_size = sizeof(struct v11_sdma_mqd);
+               mqd->mqd_stride = kfd_mqd_stride;
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
 #endif
index 0115616..bb16b79 100644 (file)
@@ -1686,6 +1686,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
                WRITE_ONCE(p->svms.faulting_task, NULL);
                if (r) {
                        pr_debug("failed %d to get svm range pages\n", r);
+                       if (r == -EBUSY)
+                               r = -EAGAIN;
                        goto unreserve_out;
                }
 
index 268cb99..88ba8b6 100644 (file)
@@ -65,6 +65,7 @@
 #include "amdgpu_dm_debugfs.h"
 #endif
 #include "amdgpu_dm_psr.h"
+#include "amdgpu_dm_replay.h"
 
 #include "ivsrcid/ivsrcid_vislands30.h"
 
@@ -4265,6 +4266,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
        enum dc_connection_type new_connection_type = dc_connection_none;
        const struct dc_plane_cap *plane;
        bool psr_feature_enabled = false;
+       bool replay_feature_enabled = false;
        int max_overlay = dm->dc->caps.max_slave_planes;
 
        dm->display_indexes_num = dm->dc->caps.max_streams;
@@ -4374,6 +4376,20 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
                }
        }
 
+       if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
+               switch (adev->ip_versions[DCE_HWIP][0]) {
+               case IP_VERSION(3, 1, 4):
+               case IP_VERSION(3, 1, 5):
+               case IP_VERSION(3, 1, 6):
+               case IP_VERSION(3, 2, 0):
+               case IP_VERSION(3, 2, 1):
+                       replay_feature_enabled = true;
+                       break;
+               default:
+                       replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK;
+                       break;
+               }
+       }
        /* loops over all connectors on the board */
        for (i = 0; i < link_cnt; i++) {
                struct dc_link *link = NULL;
@@ -4422,6 +4438,12 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
                                amdgpu_dm_update_connector_after_detect(aconnector);
                                setup_backlight_device(dm, aconnector);
 
+                               /*
+                                * Disable psr if replay can be enabled
+                                */
+                               if (replay_feature_enabled && amdgpu_dm_setup_replay(link, aconnector))
+                                       psr_feature_enabled = false;
+
                                if (psr_feature_enabled)
                                        amdgpu_dm_set_psr_caps(link);
 
@@ -6004,7 +6026,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
        if (recalculate_timing)
                drm_mode_set_crtcinfo(&saved_mode, 0);
-       else
+       else if (!old_stream)
                drm_mode_set_crtcinfo(&mode, 0);
 
        /*
index 30d4c6f..97b7a0b 100644 (file)
@@ -29,6 +29,7 @@
 #include "dc.h"
 #include "amdgpu.h"
 #include "amdgpu_dm_psr.h"
+#include "amdgpu_dm_replay.h"
 #include "amdgpu_dm_crtc.h"
 #include "amdgpu_dm_plane.h"
 #include "amdgpu_dm_trace.h"
@@ -123,7 +124,12 @@ static void vblank_control_worker(struct work_struct *work)
         * fill_dc_dirty_rects().
         */
        if (vblank_work->stream && vblank_work->stream->link) {
-               if (vblank_work->enable) {
+               /*
+                * Prioritize replay, instead of psr
+                */
+               if (vblank_work->stream->link->replay_settings.replay_feature_enabled)
+                       amdgpu_dm_replay_enable(vblank_work->stream, false);
+               else if (vblank_work->enable) {
                        if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
                            vblank_work->stream->link->psr_settings.psr_allow_active)
                                amdgpu_dm_psr_disable(vblank_work->stream);
@@ -132,6 +138,7 @@ static void vblank_control_worker(struct work_struct *work)
 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
                           !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) &&
 #endif
+                          vblank_work->stream->link->panel_config.psr.disallow_replay &&
                           vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
                        amdgpu_dm_psr_enable(vblank_work->stream);
                }
index 8eeca16..cc74dd6 100644 (file)
@@ -1269,6 +1269,13 @@ void amdgpu_dm_plane_handle_cursor_update(struct drm_plane *plane,
        attributes.rotation_angle    = 0;
        attributes.attribute_flags.value = 0;
 
+       /* Enable cursor degamma ROM on DCN3+ for implicit sRGB degamma in DRM
+        * legacy gamma setup.
+        */
+       if (crtc_state->cm_is_degamma_srgb &&
+           adev->dm.dc->caps.color.dpp.gamma_corr)
+               attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1;
+
        attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
 
        if (crtc_state->stream) {
@@ -1468,6 +1475,15 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
                drm_plane_create_blend_mode_property(plane, blend_caps);
        }
 
+       if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
+               drm_plane_create_zpos_immutable_property(plane, 0);
+       } else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
+               unsigned int zpos = 1 + drm_plane_index(plane);
+               drm_plane_create_zpos_property(plane, zpos, 1, 254);
+       } else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+               drm_plane_create_zpos_immutable_property(plane, 255);
+       }
+
        if (plane->type == DRM_PLANE_TYPE_PRIMARY &&
            plane_cap &&
            (plane_cap->pixel_format_support.nv12 ||
index 69ffd44..1b8c2ae 100644 (file)
@@ -78,3 +78,4 @@ DC_EDID += dc_edid_parser.o
 AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB))
 AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID))
 AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB) $(AMD_DISPLAY_EDID)
+
index 3e0da87..1042cf1 100644 (file)
@@ -32,6 +32,7 @@
 
 #define MAX_INSTANCE                                        6
 #define MAX_SEGMENT                                         6
+#define SMU_REGISTER_WRITE_RETRY_COUNT                      5
 
 struct IP_BASE_INSTANCE {
     unsigned int segment[MAX_SEGMENT];
@@ -132,6 +133,8 @@ static int dcn315_smu_send_msg_with_param(
                unsigned int msg_id, unsigned int param)
 {
        uint32_t result;
+       uint32_t i = 0;
+       uint32_t read_back_data;
 
        result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
 
@@ -148,10 +151,19 @@ static int dcn315_smu_send_msg_with_param(
        /* Set the parameter register for the SMU message, unit is Mhz */
        REG_WRITE(MP1_SMN_C2PMSG_37, param);
 
-       /* Trigger the message transaction by writing the message ID */
-       generic_write_indirect_reg(CTX,
-               REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
-               mmMP1_C2PMSG_3, msg_id);
+       for (i = 0; i < SMU_REGISTER_WRITE_RETRY_COUNT; i++) {
+               /* Trigger the message transaction by writing the message ID */
+               generic_write_indirect_reg(CTX,
+                       REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
+                       mmMP1_C2PMSG_3, msg_id);
+               read_back_data = generic_read_indirect_reg(CTX,
+                       REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
+                       mmMP1_C2PMSG_3);
+               if (read_back_data == msg_id)
+                       break;
+               udelay(2);
+               smu_print("SMU msg id write fail %x times. \n", i + 1);
+       }
 
        result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
 
index 566d704..3a9077b 100644 (file)
@@ -2073,12 +2073,12 @@ enum dc_status dc_commit_streams(struct dc *dc,
                }
        }
 
-       /* Check for case where we are going from odm 2:1 to max
-        *  pipe scenario.  For these cases, we will call
-        *  commit_minimal_transition_state() to exit out of odm 2:1
-        *  first before processing new streams
+       /* ODM Combine 2:1 power optimization is only applied for single stream
+        * scenario, it uses extra pipes than needed to reduce power consumption
+        * We need to switch off this feature to make room for new streams.
         */
-       if (stream_count == dc->res_pool->pipe_count) {
+       if (stream_count > dc->current_state->stream_count &&
+                       dc->current_state->stream_count == 1) {
                for (i = 0; i < dc->res_pool->pipe_count; i++) {
                        pipe = &dc->current_state->res_ctx.pipe_ctx[i];
                        if (pipe->next_odm_pipe)
@@ -3501,6 +3501,45 @@ static void commit_planes_for_stream_fast(struct dc *dc,
                top_pipe_to_program->stream->update_flags.raw = 0;
 }
 
+static void wait_for_outstanding_hw_updates(struct dc *dc, const struct dc_state *dc_context)
+{
+/*
+ * This function calls HWSS to wait for any potentially double buffered
+ * operations to complete. It should be invoked as a pre-amble prior
+ * to full update programming before asserting any HW locks.
+ */
+       int pipe_idx;
+       int opp_inst;
+       int opp_count = dc->res_pool->pipe_count;
+       struct hubp *hubp;
+       int mpcc_inst;
+       const struct pipe_ctx *pipe_ctx;
+
+       for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
+               pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx];
+
+               if (!pipe_ctx->stream)
+                       continue;
+
+               if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear)
+                       pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg);
+
+               hubp = pipe_ctx->plane_res.hubp;
+               if (!hubp)
+                       continue;
+
+               mpcc_inst = hubp->inst;
+               // MPCC inst is equal to pipe index in practice
+               for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
+                       if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
+                               dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
+                               dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
+                               break;
+                       }
+               }
+       }
+}
+
 static void commit_planes_for_stream(struct dc *dc,
                struct dc_surface_update *srf_updates,
                int surface_count,
@@ -3519,24 +3558,9 @@ static void commit_planes_for_stream(struct dc *dc,
        // dc->current_state anymore, so we have to cache it before we apply
        // the new SubVP context
        subvp_prev_use = false;
-
-
        dc_z10_restore(dc);
-
-       if (update_type == UPDATE_TYPE_FULL) {
-               /* wait for all double-buffer activity to clear on all pipes */
-               int pipe_idx;
-
-               for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
-                       struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
-
-                       if (!pipe_ctx->stream)
-                               continue;
-
-                       if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear)
-                               pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg);
-               }
-       }
+       if (update_type == UPDATE_TYPE_FULL)
+               wait_for_outstanding_hw_updates(dc, context);
 
        if (update_type == UPDATE_TYPE_FULL) {
                dc_allow_idle_optimizations(dc, false);
index 65fa9e2..e72f15a 100644 (file)
@@ -1106,29 +1106,6 @@ void dcn20_blank_pixel_data(
                        v_active,
                        offset);
 
-       if (!blank && dc->debug.enable_single_display_2to1_odm_policy) {
-               /* when exiting dynamic ODM need to reinit DPG state for unused pipes */
-               struct pipe_ctx *old_odm_pipe = dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx].next_odm_pipe;
-
-               odm_pipe = pipe_ctx->next_odm_pipe;
-
-               while (old_odm_pipe) {
-                       if (!odm_pipe || old_odm_pipe->pipe_idx != odm_pipe->pipe_idx)
-                               dc->hwss.set_disp_pattern_generator(dc,
-                                               old_odm_pipe,
-                                               CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
-                                               CONTROLLER_DP_COLOR_SPACE_UDEFINED,
-                                               COLOR_DEPTH_888,
-                                               NULL,
-                                               0,
-                                               0,
-                                               0);
-                       old_odm_pipe = old_odm_pipe->next_odm_pipe;
-                       if (odm_pipe)
-                               odm_pipe = odm_pipe->next_odm_pipe;
-               }
-       }
-
        if (!blank)
                if (stream_res->abm) {
                        dc->hwss.set_pipe(pipe_ctx);
@@ -1584,17 +1561,6 @@ static void dcn20_update_dchubp_dpp(
                        || plane_state->update_flags.bits.global_alpha_change
                        || plane_state->update_flags.bits.per_pixel_alpha_change) {
                // MPCC inst is equal to pipe index in practice
-               int mpcc_inst = hubp->inst;
-               int opp_inst;
-               int opp_count = dc->res_pool->pipe_count;
-
-               for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
-                       if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
-                               dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
-                               dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
-                               break;
-                       }
-               }
                hws->funcs.update_mpcc(dc, pipe_ctx);
        }
 
@@ -1722,11 +1688,16 @@ static void dcn20_program_pipe(
                struct dc_state *context)
 {
        struct dce_hwseq *hws = dc->hwseq;
-       /* Only need to unblank on top pipe */
 
-       if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.abm_level)
-                       && !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe)
-               hws->funcs.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible);
+       /* Only need to unblank on top pipe */
+       if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) {
+               if (pipe_ctx->update_flags.bits.enable ||
+                               pipe_ctx->update_flags.bits.odm ||
+                               pipe_ctx->stream->update_flags.bits.abm_level)
+                       hws->funcs.blank_pixel_data(dc, pipe_ctx,
+                                       !pipe_ctx->plane_state ||
+                                       !pipe_ctx->plane_state->visible);
+       }
 
        /* Only update TG on top pipe */
        if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
index 6cef62d..255713e 100644 (file)
@@ -987,3 +987,20 @@ void dcn30_prepare_bandwidth(struct dc *dc,
        }
 }
 
+void dcn30_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+               int num_pipes, const struct dc_static_screen_params *params)
+{
+       unsigned int i;
+       unsigned int triggers = 0;
+
+       if (params->triggers.surface_update)
+               triggers |= 0x100;
+       if (params->triggers.cursor_update)
+               triggers |= 0x8;
+       if (params->triggers.force_trigger)
+               triggers |= 0x1;
+
+       for (i = 0; i < num_pipes; i++)
+               pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+                                       triggers, params->num_frames);
+}
index a24a8e3..ce19c54 100644 (file)
@@ -87,5 +87,7 @@ void dcn30_set_hubp_blank(const struct dc *dc,
 void dcn30_prepare_bandwidth(struct dc *dc,
        struct dc_state *context);
 
+void dcn30_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+               int num_pipes, const struct dc_static_screen_params *params);
 
 #endif /* __DC_HWSS_DCN30_H__ */
index 3d19aca..0de8b27 100644 (file)
@@ -64,7 +64,7 @@ static const struct hw_sequencer_funcs dcn30_funcs = {
        .update_bandwidth = dcn20_update_bandwidth,
        .set_drr = dcn10_set_drr,
        .get_position = dcn10_get_position,
-       .set_static_screen_control = dcn10_set_static_screen_control,
+       .set_static_screen_control = dcn30_set_static_screen_control,
        .setup_stereo = dcn10_setup_stereo,
        .set_avmute = dcn30_set_avmute,
        .log_hw_state = dcn10_log_hw_state,
index 257df86..61205cd 100644 (file)
@@ -75,6 +75,7 @@ static const struct hw_sequencer_funcs dcn301_funcs = {
        .get_hw_state = dcn10_get_hw_state,
        .clear_status_bits = dcn10_clear_status_bits,
        .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+       .edp_backlight_control = dce110_edp_backlight_control,
        .edp_power_control = dce110_edp_power_control,
        .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
        .set_cursor_position = dcn10_set_cursor_position,
index fc25cc3..1d7bc1e 100644 (file)
@@ -67,7 +67,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
        .update_bandwidth = dcn20_update_bandwidth,
        .set_drr = dcn10_set_drr,
        .get_position = dcn10_get_position,
-       .set_static_screen_control = dcn10_set_static_screen_control,
+       .set_static_screen_control = dcn30_set_static_screen_control,
        .setup_stereo = dcn10_setup_stereo,
        .set_avmute = dcn30_set_avmute,
        .log_hw_state = dcn10_log_hw_state,
index ca8fe55..4ef85c3 100644 (file)
@@ -69,7 +69,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
        .update_bandwidth = dcn20_update_bandwidth,
        .set_drr = dcn10_set_drr,
        .get_position = dcn10_get_position,
-       .set_static_screen_control = dcn10_set_static_screen_control,
+       .set_static_screen_control = dcn30_set_static_screen_control,
        .setup_stereo = dcn10_setup_stereo,
        .set_avmute = dcn30_set_avmute,
        .log_hw_state = dcn10_log_hw_state,
index 777b2fa..c741714 100644 (file)
@@ -65,7 +65,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
        .update_bandwidth = dcn20_update_bandwidth,
        .set_drr = dcn10_set_drr,
        .get_position = dcn10_get_position,
-       .set_static_screen_control = dcn10_set_static_screen_control,
+       .set_static_screen_control = dcn30_set_static_screen_control,
        .setup_stereo = dcn10_setup_stereo,
        .set_avmute = dcn30_set_avmute,
        .log_hw_state = dcn10_log_hw_state,
index 935cd23..f9d601c 100644 (file)
@@ -2564,18 +2564,128 @@ static int find_optimal_free_pipe_as_secondary_dpp_pipe(
        return free_pipe_idx;
 }
 
+static struct pipe_ctx *find_idle_secondary_pipe_check_mpo(
+               struct resource_context *res_ctx,
+               const struct resource_pool *pool,
+               const struct pipe_ctx *primary_pipe)
+{
+       int i;
+       struct pipe_ctx *secondary_pipe = NULL;
+       struct pipe_ctx *next_odm_mpo_pipe = NULL;
+       int primary_index, preferred_pipe_idx;
+       struct pipe_ctx *old_primary_pipe = NULL;
+
+       /*
+        * Modified from find_idle_secondary_pipe
+        * With windowed MPO and ODM, we want to avoid the case where we want a
+        *  free pipe for the left side but the free pipe is being used on the
+        *  right side.
+        * Add check on current_state if the primary_pipe is the left side,
+        *  to check the right side ( primary_pipe->next_odm_pipe ) to see if
+        *  it is using a pipe for MPO ( primary_pipe->next_odm_pipe->bottom_pipe )
+        * - If so, then don't use this pipe
+        * EXCEPTION - 3 plane ( 2 MPO plane ) case
+        * - in this case, the primary pipe has already gotten a free pipe for the
+        *  MPO window in the left
+        * - when it tries to get a free pipe for the MPO window on the right,
+        *  it will see that it is already assigned to the right side
+        *  ( primary_pipe->next_odm_pipe ).  But in this case, we want this
+        *  free pipe, since it will be for the right side.  So add an
+        *  additional condition, that skipping the free pipe on the right only
+        *  applies if the primary pipe has no bottom pipe currently assigned
+        */
+       if (primary_pipe) {
+               primary_index = primary_pipe->pipe_idx;
+               old_primary_pipe = &primary_pipe->stream->ctx->dc->current_state->res_ctx.pipe_ctx[primary_index];
+               if ((old_primary_pipe->next_odm_pipe) && (old_primary_pipe->next_odm_pipe->bottom_pipe)
+                       && (!primary_pipe->bottom_pipe))
+                       next_odm_mpo_pipe = old_primary_pipe->next_odm_pipe->bottom_pipe;
+
+               preferred_pipe_idx = (pool->pipe_count - 1) - primary_pipe->pipe_idx;
+               if ((res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) &&
+                       !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == preferred_pipe_idx)) {
+                       secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx];
+                       secondary_pipe->pipe_idx = preferred_pipe_idx;
+               }
+       }
+
+       /*
+        * search backwards for the second pipe to keep pipe
+        * assignment more consistent
+        */
+       if (!secondary_pipe)
+               for (i = pool->pipe_count - 1; i >= 0; i--) {
+                       if ((res_ctx->pipe_ctx[i].stream == NULL) &&
+                               !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == i)) {
+                               secondary_pipe = &res_ctx->pipe_ctx[i];
+                               secondary_pipe->pipe_idx = i;
+                               break;
+                       }
+               }
+
+       return secondary_pipe;
+}
+
+static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
+               struct dc_state *state,
+               const struct resource_pool *pool,
+               struct dc_stream_state *stream,
+               const struct pipe_ctx *head_pipe)
+{
+       struct resource_context *res_ctx = &state->res_ctx;
+       struct pipe_ctx *idle_pipe, *pipe;
+       struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx;
+       int head_index;
+
+       if (!head_pipe)
+               ASSERT(0);
+
+       /*
+        * Modified from dcn20_acquire_idle_pipe_for_layer
+        * Check if head_pipe in old_context already has bottom_pipe allocated.
+        * - If so, check if that pipe is available in the current context.
+        * --  If so, reuse pipe from old_context
+        */
+       head_index = head_pipe->pipe_idx;
+       pipe = &old_ctx->pipe_ctx[head_index];
+       if (pipe->bottom_pipe && res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx].stream == NULL) {
+               idle_pipe = &res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx];
+               idle_pipe->pipe_idx = pipe->bottom_pipe->pipe_idx;
+       } else {
+               idle_pipe = find_idle_secondary_pipe_check_mpo(res_ctx, pool, head_pipe);
+               if (!idle_pipe)
+                       return NULL;
+       }
+
+       idle_pipe->stream = head_pipe->stream;
+       idle_pipe->stream_res.tg = head_pipe->stream_res.tg;
+       idle_pipe->stream_res.opp = head_pipe->stream_res.opp;
+
+       idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx];
+       idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx];
+       idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx];
+       idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst;
+
+       return idle_pipe;
+}
+
 struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe(
                const struct dc_state *cur_ctx,
                struct dc_state *new_ctx,
                const struct resource_pool *pool,
                const struct pipe_ctx *opp_head_pipe)
 {
-       int free_pipe_idx =
-                       find_optimal_free_pipe_as_secondary_dpp_pipe(
-                                       &cur_ctx->res_ctx, &new_ctx->res_ctx,
-                                       pool, opp_head_pipe);
+
+       int free_pipe_idx;
        struct pipe_ctx *free_pipe;
 
+       if (!opp_head_pipe->stream->ctx->dc->config.enable_windowed_mpo_odm)
+               return dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
+                               new_ctx, pool, opp_head_pipe->stream, opp_head_pipe);
+
+       free_pipe_idx = find_optimal_free_pipe_as_secondary_dpp_pipe(
+                                       &cur_ctx->res_ctx, &new_ctx->res_ctx,
+                                       pool, opp_head_pipe);
        if (free_pipe_idx >= 0) {
                free_pipe = &new_ctx->res_ctx.pipe_ctx[free_pipe_idx];
                free_pipe->pipe_idx = free_pipe_idx;
index 8afda5e..5805fb0 100644 (file)
@@ -1099,6 +1099,11 @@ void dcn20_calculate_dlg_params(struct dc *dc,
                context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
                                                pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
                context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+               if (dc->ctx->dce_version < DCN_VERSION_3_1 &&
+                   context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
+                       dcn20_adjust_freesync_v_startup(
+                               &context->res_ctx.pipe_ctx[i].stream->timing,
+                               &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
 
                pipe_idx++;
        }
@@ -1927,7 +1932,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co
        int vlevel = 0;
        int pipe_split_from[MAX_PIPES];
        int pipe_cnt = 0;
-       int i = 0;
        display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
        DC_LOGGER_INIT(dc->ctx->logger);
 
@@ -1951,15 +1955,6 @@ static bool dcn20_validate_bandwidth_internal(struct dc *dc, struct dc_state *co
        dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
        dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
 
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (!context->res_ctx.pipe_ctx[i].stream)
-                       continue;
-               if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
-                       dcn20_adjust_freesync_v_startup(
-                               &context->res_ctx.pipe_ctx[i].stream->timing,
-                               &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
-       }
-
        BW_VAL_TRACE_END_WATERMARKS();
 
        goto validate_out;
@@ -2232,7 +2227,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
        int vlevel = 0;
        int pipe_split_from[MAX_PIPES];
        int pipe_cnt = 0;
-       int i = 0;
        display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
        DC_LOGGER_INIT(dc->ctx->logger);
 
@@ -2261,15 +2255,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc,
        dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
        dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
 
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (!context->res_ctx.pipe_ctx[i].stream)
-                       continue;
-               if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
-                       dcn20_adjust_freesync_v_startup(
-                               &context->res_ctx.pipe_ctx[i].stream->timing,
-                               &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
-       }
-
        BW_VAL_TRACE_END_WATERMARKS();
 
        goto validate_out;
index 07adb61..fb21572 100644 (file)
@@ -293,6 +293,17 @@ static unsigned int micro_sec_to_vert_lines(unsigned int num_us, struct dc_crtc_
        return num_lines;
 }
 
+static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
+{
+       unsigned int v_active = 0, v_blank = 0, v_back_porch = 0;
+
+       v_active = timing->v_border_top + timing->v_addressable + timing->v_border_bottom;
+       v_blank = timing->v_total - v_active;
+       v_back_porch = v_blank - timing->v_front_porch - timing->v_sync_width;
+
+       return v_back_porch;
+}
+
 int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
                                               display_e2e_pipe_params_st *pipes,
                                               bool fast_validate)
@@ -310,6 +321,7 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c
        for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
                struct dc_crtc_timing *timing;
                unsigned int num_lines = 0;
+               unsigned int v_back_porch = 0;
 
                if (!res_ctx->pipe_ctx[i].stream)
                        continue;
@@ -323,9 +335,16 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c
                else
                        pipes[pipe_cnt].pipe.dest.vtotal = timing->v_total;
 
+               v_back_porch  = get_vertical_back_porch(timing);
+
                pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
                pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, num_lines);
-               pipes[pipe_cnt].pipe.dest.vblank_nom = max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width);
+               // vblank_nom should not smaller than (VSync (timing->v_sync_width + v_back_porch) + 2)
+               // + 2 is because
+               // 1 -> VStartup_start should be 1 line before VSync
+               // 1 -> always reserve 1 line between start of vblank to vstartup signal
+               pipes[pipe_cnt].pipe.dest.vblank_nom =
+                       max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width + v_back_porch + 2);
                pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom);
 
                if (pipe->plane_state &&
index dbd6081..ef3a674 100644 (file)
@@ -338,7 +338,9 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
                 *  - Delta for CEIL: delta_from_mid_point_in_us_1
                 *  - Delta for FLOOR: delta_from_mid_point_in_us_2
                 */
-               if ((last_render_time_in_us / mid_point_frames_ceil) < in_out_vrr->min_duration_in_us) {
+               if (mid_point_frames_ceil &&
+                   (last_render_time_in_us / mid_point_frames_ceil) <
+                   in_out_vrr->min_duration_in_us) {
                        /* Check for out of range.
                         * If using CEIL produces a value that is out of range,
                         * then we are forced to use FLOOR.
@@ -385,8 +387,9 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
                /* Either we've calculated the number of frames to insert,
                 * or we need to insert min duration frames
                 */
-               if (last_render_time_in_us / frames_to_insert <
-                               in_out_vrr->min_duration_in_us){
+               if (frames_to_insert &&
+                   (last_render_time_in_us / frames_to_insert) <
+                   in_out_vrr->min_duration_in_us){
                        frames_to_insert -= (frames_to_insert > 1) ?
                                        1 : 0;
                }
index abe829b..67d7b7e 100644 (file)
@@ -240,6 +240,7 @@ enum DC_FEATURE_MASK {
        DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default
        DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default
        DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default
+       DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4
 };
 
 enum DC_DEBUG_MASK {
@@ -250,6 +251,7 @@ enum DC_DEBUG_MASK {
        DC_DISABLE_PSR = 0x10,
        DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
        DC_DISABLE_MPO = 0x40,
+       DC_DISABLE_REPLAY = 0x50,
        DC_ENABLE_DPIA_TRACE = 0x80,
 };
 
index e68c1e2..fa7d6ce 100644 (file)
@@ -3117,6 +3117,24 @@ enum atom_umc_config1_def {
        UMC_CONFIG1__ENABLE_ECC_CAPABLE = 0x00010000,
 };
 
+struct atom_umc_info_v4_0 {
+       struct atom_common_table_header table_header;
+       uint32_t ucode_reserved[5];
+       uint8_t umcip_min_ver;
+       uint8_t umcip_max_ver;
+       uint8_t vram_type;
+       uint8_t umc_config;
+       uint32_t mem_refclk_10khz;
+       uint32_t clk_reserved[4];
+       uint32_t golden_reserved;
+       uint32_t umc_config1;
+       uint32_t reserved[2];
+       uint8_t channel_num;
+       uint8_t channel_width;
+       uint8_t channel_reserve[2];
+       uint8_t umc_info_reserved[16];
+};
+
 /* 
   ***************************************************************************
     Data Table vram_info  structure
index f43e297..7a9d473 100644 (file)
@@ -30,7 +30,7 @@
 #define GC_TABLE_ID                     0x4347
 #define HARVEST_TABLE_SIGNATURE         0x56524148
 #define VCN_INFO_TABLE_ID               0x004E4356
-#define MALL_INFO_TABLE_ID              0x4D414C4C
+#define MALL_INFO_TABLE_ID              0x4C4C414D
 
 typedef enum
 {
@@ -280,6 +280,36 @@ struct gc_info_v2_0 {
        uint32_t gc_num_packer_per_sc;
 };
 
+struct gc_info_v2_1 {
+       struct gpu_info_header header;
+
+       uint32_t gc_num_se;
+       uint32_t gc_num_cu_per_sh;
+       uint32_t gc_num_sh_per_se;
+       uint32_t gc_num_rb_per_se;
+       uint32_t gc_num_tccs;
+       uint32_t gc_num_gprs;
+       uint32_t gc_num_max_gs_thds;
+       uint32_t gc_gs_table_depth;
+       uint32_t gc_gsprim_buff_depth;
+       uint32_t gc_parameter_cache_depth;
+       uint32_t gc_double_offchip_lds_buffer;
+       uint32_t gc_wave_size;
+       uint32_t gc_max_waves_per_simd;
+       uint32_t gc_max_scratch_slots_per_cu;
+       uint32_t gc_lds_size;
+       uint32_t gc_num_sc_per_se;
+       uint32_t gc_num_packer_per_sc;
+       /* new for v2_1 */
+       uint32_t gc_num_tcp_per_sh;
+       uint32_t gc_tcp_size_per_cu;
+       uint32_t gc_num_sdp_interface;
+       uint32_t gc_num_cu_per_sqc;
+       uint32_t gc_instruction_cache_size_per_sqc;
+       uint32_t gc_scalar_data_cache_size_per_sqc;
+       uint32_t gc_tcc_size;
+};
+
 typedef struct harvest_info_header {
        uint32_t signature; /* Table Signature */
        uint32_t version;   /* Table Version */
@@ -312,6 +342,12 @@ struct mall_info_v1_0 {
        uint32_t reserved[5];
 };
 
+struct mall_info_v2_0 {
+       struct mall_info_header header;
+       uint32_t mall_size_per_umc;
+       uint32_t reserved[8];
+};
+
 #define VCN_INFO_TABLE_MAX_NUM_INSTANCES 4
 
 struct vcn_info_header {
index 5b1d73b..41147da 100644 (file)
@@ -3311,8 +3311,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
            (gc_ver != IP_VERSION(9, 4, 3)) &&
            (attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
             attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
+            attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
             attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
-            attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+            attr == &sensor_dev_attr_temp3_label.dev_attr.attr ||
+            attr == &sensor_dev_attr_temp3_crit.dev_attr.attr))
                return 0;
 
        /* hotspot temperature for gc 9,4,3*/
@@ -3324,9 +3326,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
        /* only SOC15 dGPUs support hotspot and mem temperatures */
        if (((adev->flags & AMD_IS_APU) || gc_ver < IP_VERSION(9, 0, 0) ||
            (gc_ver == IP_VERSION(9, 4, 3))) &&
-           (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
-            attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
-            attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
+            (attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
             attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
             attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
             attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
@@ -3471,6 +3471,9 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
        size = sizeof(uint32_t);
        if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size))
                seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff);
+       size = sizeof(uint32_t);
+       if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size))
+               seq_printf(m, "\t%u.%u W (current GPU)\n", query >> 8, query & 0xff);
        size = sizeof(value);
        seq_printf(m, "\n");
 
index 95eb8a5..5a52098 100644 (file)
@@ -1031,10 +1031,7 @@ struct pptable_funcs {
                                                   enum smu_feature_mask mask);
 
        /**
-        * @notify_display_change: Enable fast memory clock switching.
-        *
-        * Allows for fine grained memory clock switching but has more stringent
-        * timing requirements.
+        * @notify_display_change: General interface call to let SMU know about DC change
         */
        int (*notify_display_change)(struct smu_context *smu);
 
index 10cff75..e2ee855 100644 (file)
 #define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
 #define PPSMC_MSG_SetPriorityDeltaGain           0x4B
 #define PPSMC_MSG_AllowIHHostInterrupt           0x4C
-#define PPSMC_Message_Count                      0x4D
+
+#define PPSMC_MSG_DALNotPresent                  0x4E
+
+#define PPSMC_Message_Count                      0x4F
 
 //Debug Dump Message
 #define DEBUGSMC_MSG_TestMessage                    0x1
index 252aef1..9be4051 100644 (file)
@@ -123,7 +123,7 @@ typedef enum {
   VOLTAGE_GUARDBAND_COUNT
 } GFX_GUARDBAND_e;
 
-#define SMU_METRICS_TABLE_VERSION 0x5
+#define SMU_METRICS_TABLE_VERSION 0x7
 
 typedef struct __attribute__((packed, aligned(4))) {
   uint32_t AccumulationCounter;
@@ -198,7 +198,7 @@ typedef struct __attribute__((packed, aligned(4))) {
   uint32_t SocketThmResidencyAcc;
   uint32_t VrThmResidencyAcc;
   uint32_t HbmThmResidencyAcc;
-  uint32_t spare;
+  uint32_t GfxLockXCDMak;
 
   // New Items at end to maintain driver compatibility
   uint32_t GfxclkFrequency[8];
index ae4f44c..70a4a71 100644 (file)
 #define PPSMC_MSG_GetMinGfxDpmFreq                  0x32
 #define PPSMC_MSG_GetMaxGfxDpmFreq                  0x33
 #define PPSMC_MSG_PrepareForDriverUnload            0x34
-#define PPSMC_Message_Count                         0x35
+#define PPSMC_MSG_ReadThrottlerLimit                0x35
+#define PPSMC_MSG_QueryValidMcaCount                0x36
+#define PPSMC_MSG_McaBankDumpDW                     0x37
+#define PPSMC_MSG_GetCTFLimit                       0x38
+#define PPSMC_Message_Count                         0x39
 
 //PPSMC Reset Types for driver msg argument
 #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET        0x1
 #define PPSMC_RESET_TYPE_DRIVER_MODE_2_RESET         0x2
 #define PPSMC_RESET_TYPE_DRIVER_MODE_3_RESET        0x3
 
+//PPSMC Reset Types for driver msg argument
+#define PPSMC_THROTTLING_LIMIT_TYPE_SOCKET          0x1
+#define PPSMC_THROTTLING_LIMIT_TYPE_HBM             0x2
+
+//CTF/Throttle Limit types
+#define PPSMC_AID_THM_TYPE                          0x1
+#define PPSMC_CCD_THM_TYPE                          0x2
+#define PPSMC_XCD_THM_TYPE                          0x3
+#define PPSMC_HBM_THM_TYPE                          0x4
+
 typedef uint32_t PPSMC_Result;
 typedef uint32_t PPSMC_MSG;
 
index 297b70b..e57265c 100644 (file)
@@ -84,6 +84,7 @@
        __SMU_DUMMY_MAP(SetTjMax),                     \
        __SMU_DUMMY_MAP(SetFanTemperatureTarget),      \
        __SMU_DUMMY_MAP(PrepareMp1ForUnload),          \
+       __SMU_DUMMY_MAP(GetCTFLimit),                  \
        __SMU_DUMMY_MAP(DramLogSetDramAddrHigh),       \
        __SMU_DUMMY_MAP(DramLogSetDramAddrLow),        \
        __SMU_DUMMY_MAP(DramLogSetDramSize),           \
        __SMU_DUMMY_MAP(AllowGpo),      \
        __SMU_DUMMY_MAP(Mode2Reset),    \
        __SMU_DUMMY_MAP(RequestI2cTransaction), \
-       __SMU_DUMMY_MAP(GetMetricsTable),
+       __SMU_DUMMY_MAP(GetMetricsTable), \
+       __SMU_DUMMY_MAP(DALNotPresent),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
index f1282fc..0232adb 100644 (file)
@@ -837,12 +837,8 @@ int smu_v13_0_notify_display_change(struct smu_context *smu)
 {
        int ret = 0;
 
-       if (!smu->pm_enabled)
-               return ret;
-
-       if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) &&
-           smu->adev->gmc.vram_type == AMDGPU_VRAM_TYPE_HBM)
-               ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1, NULL);
+       if (!amdgpu_device_has_dc_support(smu->adev))
+               ret = smu_cmn_send_smc_msg(smu, SMU_MSG_DALNotPresent, NULL);
 
        return ret;
 }
index 8b7403b..3903a47 100644 (file)
@@ -162,6 +162,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
        MSG_MAP(AllowGpo,                       PPSMC_MSG_SetGpoAllow,           0),
        MSG_MAP(AllowIHHostInterrupt,           PPSMC_MSG_AllowIHHostInterrupt,       0),
        MSG_MAP(ReenableAcDcInterrupt,          PPSMC_MSG_ReenableAcDcInterrupt,       0),
+       MSG_MAP(DALNotPresent,          PPSMC_MSG_DALNotPresent,       0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -2687,6 +2688,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
        .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag,
        .gpo_control = smu_v13_0_gpo_control,
        .get_ecc_info = smu_v13_0_0_get_ecc_info,
+       .notify_display_change = smu_v13_0_notify_display_change,
 };
 
 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
index 6ed9cd0..199a673 100644 (file)
@@ -132,6 +132,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
        MSG_MAP(SetSoftMinGfxclk,                    PPSMC_MSG_SetSoftMinGfxClk,                0),
        MSG_MAP(SetSoftMaxGfxClk,                    PPSMC_MSG_SetSoftMaxGfxClk,                0),
        MSG_MAP(PrepareMp1ForUnload,                 PPSMC_MSG_PrepareForDriverUnload,          0),
+       MSG_MAP(GetCTFLimit,                         PPSMC_MSG_GetCTFLimit,                     0),
 };
 
 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
@@ -2081,6 +2082,55 @@ out:
        return ret;
 }
 
+static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu,
+                                                    struct smu_temperature_range *range)
+{
+       struct amdgpu_device *adev = smu->adev;
+       u32 aid_temp, xcd_temp, mem_temp;
+       uint32_t smu_version;
+       u32 ccd_temp = 0;
+       int ret;
+
+       if (amdgpu_sriov_vf(smu->adev))
+               return 0;
+
+       if (!range)
+               return -EINVAL;
+
+       /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */
+       smu_cmn_get_smc_version(smu, NULL, &smu_version);
+       if (smu_version < 0x554500)
+               return 0;
+
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+                                             PPSMC_AID_THM_TYPE, &aid_temp);
+       if (ret)
+               goto failed;
+
+       if (adev->flags & AMD_IS_APU) {
+               ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+                                                     PPSMC_CCD_THM_TYPE, &ccd_temp);
+               if (ret)
+                       goto failed;
+       }
+
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+                                             PPSMC_XCD_THM_TYPE, &xcd_temp);
+       if (ret)
+               goto failed;
+
+       range->hotspot_crit_max = max3(aid_temp, xcd_temp, ccd_temp) *
+                                      SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+                                             PPSMC_HBM_THM_TYPE, &mem_temp);
+       if (ret)
+               goto failed;
+
+       range->mem_crit_max = mem_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+failed:
+       return ret;
+}
+
 static int smu_v13_0_6_mode1_reset(struct smu_context *smu)
 {
        struct amdgpu_device *adev = smu->adev;
@@ -2108,8 +2158,7 @@ static int smu_v13_0_6_mode1_reset(struct smu_context *smu)
 
 static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu)
 {
-       /* TODO: Enable this when FW support is added */
-       return false;
+       return true;
 }
 
 static bool smu_v13_0_6_is_mode2_reset_supported(struct smu_context *smu)
@@ -2177,6 +2226,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
        .get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
        .set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
        .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
+       .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range,
        .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
        .mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported,
        .mode1_reset = smu_v13_0_6_mode1_reset,
index e99a6fa..a7e6775 100644 (file)
@@ -58,6 +58,7 @@ struct i915_perf_group;
 
 typedef u32 intel_engine_mask_t;
 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
+#define VIRTUAL_ENGINES BIT(BITS_PER_TYPE(intel_engine_mask_t) - 1)
 
 struct intel_hw_status_page {
        struct list_head timelines;
index a0e3ef1..b5b7f2f 100644 (file)
@@ -5470,6 +5470,9 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
 
        ve->base.flags = I915_ENGINE_IS_VIRTUAL;
 
+       BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES);
+       ve->base.mask = VIRTUAL_ENGINES;
+
        intel_context_init(&ve->context, &ve->base);
 
        for (n = 0; n < count; n++) {
index 4ec8530..094fca9 100644 (file)
 static bool enable_out_of_sync = false;
 static int preallocated_oos_pages = 8192;
 
-static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
-{
-       struct kvm *kvm = vgpu->vfio_device.kvm;
-       int idx;
-       bool ret;
-
-       if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
-               return false;
-
-       idx = srcu_read_lock(&kvm->srcu);
-       ret = kvm_is_visible_gfn(kvm, gfn);
-       srcu_read_unlock(&kvm->srcu, idx);
-
-       return ret;
-}
-
 /*
  * validate a gm address and related range size,
  * translate it to host gm address
@@ -1161,31 +1145,6 @@ static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
        ops->set_pfn(se, s->shadow_page.mfn);
 }
 
-/*
- * Check if can do 2M page
- * @vgpu: target vgpu
- * @entry: target pfn's gtt entry
- *
- * Return 1 if 2MB huge gtt shadowing is possible, 0 if miscondition,
- * negative if found err.
- */
-static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
-       struct intel_gvt_gtt_entry *entry)
-{
-       const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
-       kvm_pfn_t pfn;
-
-       if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
-               return 0;
-
-       if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
-               return -EINVAL;
-       pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
-       if (is_error_noslot_pfn(pfn))
-               return -EINVAL;
-       return PageTransHuge(pfn_to_page(pfn));
-}
-
 static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
        struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
        struct intel_gvt_gtt_entry *se)
@@ -1279,7 +1238,7 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
 {
        const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
        struct intel_gvt_gtt_entry se = *ge;
-       unsigned long gfn, page_size = PAGE_SIZE;
+       unsigned long gfn;
        dma_addr_t dma_addr;
        int ret;
 
@@ -1291,6 +1250,9 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
        switch (ge->type) {
        case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
                gvt_vdbg_mm("shadow 4K gtt entry\n");
+               ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
+               if (ret)
+                       return -ENXIO;
                break;
        case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
                gvt_vdbg_mm("shadow 64K gtt entry\n");
@@ -1302,25 +1264,20 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
                return split_64KB_gtt_entry(vgpu, spt, index, &se);
        case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
                gvt_vdbg_mm("shadow 2M gtt entry\n");
-               ret = is_2MB_gtt_possible(vgpu, ge);
-               if (ret == 0)
+               if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
+                   intel_gvt_dma_map_guest_page(vgpu, gfn,
+                                                I915_GTT_PAGE_SIZE_2M, &dma_addr))
                        return split_2MB_gtt_entry(vgpu, spt, index, &se);
-               else if (ret < 0)
-                       return ret;
-               page_size = I915_GTT_PAGE_SIZE_2M;
                break;
        case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
                gvt_vgpu_err("GVT doesn't support 1GB entry\n");
                return -EINVAL;
        default:
                GEM_BUG_ON(1);
+               return -EINVAL;
        }
 
-       /* direct shadow */
-       ret = intel_gvt_dma_map_guest_page(vgpu, gfn, page_size, &dma_addr);
-       if (ret)
-               return -ENXIO;
-
+       /* Successfully shadowed a 4K or 2M page (without splitting). */
        pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
        ppgtt_set_shadow_entry(spt, &se, index);
        return 0;
@@ -1329,11 +1286,9 @@ static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
 static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
 {
        struct intel_vgpu *vgpu = spt->vgpu;
-       struct intel_gvt *gvt = vgpu->gvt;
-       const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
        struct intel_vgpu_ppgtt_spt *s;
        struct intel_gvt_gtt_entry se, ge;
-       unsigned long gfn, i;
+       unsigned long i;
        int ret;
 
        trace_spt_change(spt->vgpu->id, "born", spt,
@@ -1350,13 +1305,6 @@ static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
                        ppgtt_generate_shadow_entry(&se, s, &ge);
                        ppgtt_set_shadow_entry(spt, &se, i);
                } else {
-                       gfn = ops->get_pfn(&ge);
-                       if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
-                               ops->set_pfn(&se, gvt->gtt.scratch_mfn);
-                               ppgtt_set_shadow_entry(spt, &se, i);
-                               continue;
-                       }
-
                        ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
                        if (ret)
                                goto fail;
@@ -1845,6 +1793,9 @@ static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
        if (mm->ppgtt_mm.shadowed)
                return 0;
 
+       if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+               return -EINVAL;
+
        mm->ppgtt_mm.shadowed = true;
 
        for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
@@ -2331,14 +2282,6 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
                m.val64 = e.val64;
                m.type = e.type;
 
-               /* one PTE update may be issued in multiple writes and the
-                * first write may not construct a valid gfn
-                */
-               if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
-                       ops->set_pfn(&m, gvt->gtt.scratch_mfn);
-                       goto out;
-               }
-
                ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
                                                   &dma_addr);
                if (ret) {
@@ -2355,7 +2298,6 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
                ops->clear_present(&m);
        }
 
-out:
        ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
 
        ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
@@ -2876,24 +2818,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
 }
 
 /**
- * intel_vgpu_reset_gtt - reset the all GTT related status
- * @vgpu: a vGPU
- *
- * This function is called from vfio core to reset reset all
- * GTT related status, including GGTT, PPGTT, scratch page.
- *
- */
-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
-{
-       /* Shadow pages are only created when there is no page
-        * table tracking data, so remove page tracking data after
-        * removing the shadow pages.
-        */
-       intel_vgpu_destroy_all_ppgtt_mm(vgpu);
-       intel_vgpu_reset_ggtt(vgpu, true);
-}
-
-/**
  * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
  * @gvt: intel gvt device
  *
index a3b0f59..4cb183e 100644 (file)
@@ -224,7 +224,6 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old);
 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
 
 int intel_gvt_init_gtt(struct intel_gvt *gvt);
-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu);
 void intel_gvt_clean_gtt(struct intel_gvt *gvt);
 
 struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
index 2d65800..53a0a42 100644 (file)
 #define _GVT_H_
 
 #include <uapi/linux/pci_regs.h>
-#include <linux/kvm_host.h>
 #include <linux/vfio.h>
 #include <linux/mdev.h>
 
+#include <asm/kvm_page_track.h>
+
 #include "i915_drv.h"
 #include "intel_gvt.h"
 
index 9cd9e9d..42ce20e 100644 (file)
@@ -106,12 +106,10 @@ struct gvt_dma {
 #define vfio_dev_to_vgpu(vfio_dev) \
        container_of((vfio_dev), struct intel_vgpu, vfio_device)
 
-static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-               const u8 *val, int len,
-               struct kvm_page_track_notifier_node *node);
-static void kvmgt_page_track_flush_slot(struct kvm *kvm,
-               struct kvm_memory_slot *slot,
-               struct kvm_page_track_notifier_node *node);
+static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
+                                  struct kvm_page_track_notifier_node *node);
+static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
+                                          struct kvm_page_track_notifier_node *node);
 
 static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
 {
@@ -161,8 +159,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
 
                if (npage == 0)
                        base_page = cur_page;
-               else if (base_page + npage != cur_page) {
-                       gvt_vgpu_err("The pages are not continuous\n");
+               else if (page_to_pfn(base_page) + npage != page_to_pfn(cur_page)) {
                        ret = -EINVAL;
                        npage++;
                        goto err;
@@ -172,7 +169,8 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
        *page = base_page;
        return 0;
 err:
-       gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
+       if (npage)
+               gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
        return ret;
 }
 
@@ -352,6 +350,8 @@ __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn)
 {
        struct kvmgt_pgfn *p, *res = NULL;
 
+       lockdep_assert_held(&info->vgpu_lock);
+
        hash_for_each_possible(info->ptable, p, hnode, gfn) {
                if (gfn == p->gfn) {
                        res = p;
@@ -654,21 +654,19 @@ out:
 static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
 {
        struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
-
-       if (!vgpu->vfio_device.kvm ||
-           vgpu->vfio_device.kvm->mm != current->mm) {
-               gvt_vgpu_err("KVM is required to use Intel vGPU\n");
-               return -ESRCH;
-       }
+       int ret;
 
        if (__kvmgt_vgpu_exist(vgpu))
                return -EEXIST;
 
        vgpu->track_node.track_write = kvmgt_page_track_write;
-       vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
-       kvm_get_kvm(vgpu->vfio_device.kvm);
-       kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
-                                        &vgpu->track_node);
+       vgpu->track_node.track_remove_region = kvmgt_page_track_remove_region;
+       ret = kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
+                                              &vgpu->track_node);
+       if (ret) {
+               gvt_vgpu_err("KVM is required to use Intel vGPU\n");
+               return ret;
+       }
 
        set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
 
@@ -703,7 +701,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
 
        kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
                                           &vgpu->track_node);
-       kvm_put_kvm(vgpu->vfio_device.kvm);
 
        kvmgt_protect_table_destroy(vgpu);
        gvt_cache_destroy(vgpu);
@@ -1547,95 +1544,70 @@ static struct mdev_driver intel_vgpu_mdev_driver = {
 
 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
 {
-       struct kvm *kvm = info->vfio_device.kvm;
-       struct kvm_memory_slot *slot;
-       int idx;
+       int r;
 
        if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
                return -ESRCH;
 
-       idx = srcu_read_lock(&kvm->srcu);
-       slot = gfn_to_memslot(kvm, gfn);
-       if (!slot) {
-               srcu_read_unlock(&kvm->srcu, idx);
-               return -EINVAL;
-       }
-
-       write_lock(&kvm->mmu_lock);
-
        if (kvmgt_gfn_is_write_protected(info, gfn))
-               goto out;
+               return 0;
 
-       kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
-       kvmgt_protect_table_add(info, gfn);
+       r = kvm_write_track_add_gfn(info->vfio_device.kvm, gfn);
+       if (r)
+               return r;
 
-out:
-       write_unlock(&kvm->mmu_lock);
-       srcu_read_unlock(&kvm->srcu, idx);
+       kvmgt_protect_table_add(info, gfn);
        return 0;
 }
 
 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
 {
-       struct kvm *kvm = info->vfio_device.kvm;
-       struct kvm_memory_slot *slot;
-       int idx;
+       int r;
 
        if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
                return -ESRCH;
 
-       idx = srcu_read_lock(&kvm->srcu);
-       slot = gfn_to_memslot(kvm, gfn);
-       if (!slot) {
-               srcu_read_unlock(&kvm->srcu, idx);
-               return -EINVAL;
-       }
-
-       write_lock(&kvm->mmu_lock);
-
        if (!kvmgt_gfn_is_write_protected(info, gfn))
-               goto out;
+               return 0;
 
-       kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
-       kvmgt_protect_table_del(info, gfn);
+       r = kvm_write_track_remove_gfn(info->vfio_device.kvm, gfn);
+       if (r)
+               return r;
 
-out:
-       write_unlock(&kvm->mmu_lock);
-       srcu_read_unlock(&kvm->srcu, idx);
+       kvmgt_protect_table_del(info, gfn);
        return 0;
 }
 
-static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-               const u8 *val, int len,
-               struct kvm_page_track_notifier_node *node)
+static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
+                                  struct kvm_page_track_notifier_node *node)
 {
        struct intel_vgpu *info =
                container_of(node, struct intel_vgpu, track_node);
 
-       if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
+       mutex_lock(&info->vgpu_lock);
+
+       if (kvmgt_gfn_is_write_protected(info, gpa >> PAGE_SHIFT))
                intel_vgpu_page_track_handler(info, gpa,
                                                     (void *)val, len);
+
+       mutex_unlock(&info->vgpu_lock);
 }
 
-static void kvmgt_page_track_flush_slot(struct kvm *kvm,
-               struct kvm_memory_slot *slot,
-               struct kvm_page_track_notifier_node *node)
+static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
+                                          struct kvm_page_track_notifier_node *node)
 {
-       int i;
-       gfn_t gfn;
+       unsigned long i;
        struct intel_vgpu *info =
                container_of(node, struct intel_vgpu, track_node);
 
-       write_lock(&kvm->mmu_lock);
-       for (i = 0; i < slot->npages; i++) {
-               gfn = slot->base_gfn + i;
-               if (kvmgt_gfn_is_write_protected(info, gfn)) {
-                       kvm_slot_page_track_remove_page(kvm, slot, gfn,
-                                               KVM_PAGE_TRACK_WRITE);
-                       kvmgt_protect_table_del(info, gfn);
-               }
+       mutex_lock(&info->vgpu_lock);
+
+       for (i = 0; i < nr_pages; i++) {
+               if (kvmgt_gfn_is_write_protected(info, gfn + i))
+                       kvmgt_protect_table_del(info, gfn + i);
        }
-       write_unlock(&kvm->mmu_lock);
+
+       mutex_unlock(&info->vgpu_lock);
 }
 
 void intel_vgpu_detach_regions(struct intel_vgpu *vgpu)
index df34e73..60a6543 100644 (file)
@@ -162,13 +162,9 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
        struct intel_vgpu_page_track *page_track;
        int ret = 0;
 
-       mutex_lock(&vgpu->vgpu_lock);
-
        page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT);
-       if (!page_track) {
-               ret = -ENXIO;
-               goto out;
-       }
+       if (!page_track)
+               return -ENXIO;
 
        if (unlikely(vgpu->failsafe)) {
                /* Remove write protection to prevent furture traps. */
@@ -179,7 +175,5 @@ int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
                        gvt_err("guest page write error, gpa %llx\n", gpa);
        }
 
-out:
-       mutex_unlock(&vgpu->vgpu_lock);
        return ret;
 }
index 7c7da28..f590810 100644 (file)
@@ -134,9 +134,7 @@ static void i915_fence_release(struct dma_fence *fence)
        i915_sw_fence_fini(&rq->semaphore);
 
        /*
-        * Keep one request on each engine for reserved use under mempressure
-        * do not use with virtual engines as this really is only needed for
-        * kernel contexts.
+        * Keep one request on each engine for reserved use under mempressure.
         *
         * We do not hold a reference to the engine here and so have to be
         * very careful in what rq->engine we poke. The virtual engine is
@@ -166,8 +164,7 @@ static void i915_fence_release(struct dma_fence *fence)
         * know that if the rq->execution_mask is a single bit, rq->engine
         * can be a physical engine with the exact corresponding mask.
         */
-       if (!intel_engine_is_virtual(rq->engine) &&
-           is_power_of_2(rq->execution_mask) &&
+       if (is_power_of_2(rq->execution_mask) &&
            !cmpxchg(&rq->engine->request_pool, NULL, rq))
                return;
 
index a349245..a34917b 100644 (file)
@@ -1122,18 +1122,11 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
        PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
        PUSH_KICK(push);
 
-       ret = nouveau_fence_new(pfence);
+       ret = nouveau_fence_new(pfence, chan);
        if (ret)
                goto fail;
 
-       ret = nouveau_fence_emit(*pfence, chan);
-       if (ret)
-               goto fail_fence_unref;
-
        return 0;
-
-fail_fence_unref:
-       nouveau_fence_unref(pfence);
 fail:
        spin_lock_irqsave(&dev->event_lock, flags);
        list_del(&s->head);
index 19cab37..0f3bd18 100644 (file)
@@ -875,16 +875,10 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
        if (ret)
                goto out_unlock;
 
-       ret = nouveau_fence_new(&fence);
+       ret = nouveau_fence_new(&fence, chan);
        if (ret)
                goto out_unlock;
 
-       ret = nouveau_fence_emit(fence, chan);
-       if (ret) {
-               nouveau_fence_unref(&fence);
-               goto out_unlock;
-       }
-
        /* TODO: figure out a better solution here
         *
         * wait on the fence here explicitly as going through
index 1fd5ccf..bb3d6e5 100644 (file)
@@ -70,11 +70,9 @@ nouveau_channel_idle(struct nouveau_channel *chan)
                struct nouveau_fence *fence = NULL;
                int ret;
 
-               ret = nouveau_fence_new(&fence);
+               ret = nouveau_fence_new(&fence, chan);
                if (!ret) {
-                       ret = nouveau_fence_emit(fence, chan);
-                       if (!ret)
-                               ret = nouveau_fence_wait(fence, false, false);
+                       ret = nouveau_fence_wait(fence, false, false);
                        nouveau_fence_unref(&fence);
                }
 
index 61e8456..12feecf 100644 (file)
@@ -209,8 +209,7 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
                goto done;
        }
 
-       if (!nouveau_fence_new(&fence))
-               nouveau_fence_emit(fence, dmem->migrate.chan);
+       nouveau_fence_new(&fence, dmem->migrate.chan);
        migrate_vma_pages(&args);
        nouveau_dmem_fence_done(&fence);
        dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -403,8 +402,7 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
                }
        }
 
-       if (!nouveau_fence_new(&fence))
-               nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
+       nouveau_fence_new(&fence, chunk->drm->dmem->migrate.chan);
        migrate_device_pages(src_pfns, dst_pfns, npages);
        nouveau_dmem_fence_done(&fence);
        migrate_device_finalize(src_pfns, dst_pfns, npages);
@@ -677,8 +675,7 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
                addr += PAGE_SIZE;
        }
 
-       if (!nouveau_fence_new(&fence))
-               nouveau_fence_emit(fence, drm->dmem->migrate.chan);
+       nouveau_fence_new(&fence, drm->dmem->migrate.chan);
        migrate_vma_pages(args);
        nouveau_dmem_fence_done(&fence);
        nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
index a90c4cd..19024ce 100644 (file)
@@ -96,7 +96,8 @@ nouveau_exec_job_submit(struct nouveau_job *job)
        unsigned long index;
        int ret;
 
-       ret = nouveau_fence_new(&exec_job->fence);
+       /* Create a new fence, but do not emit yet. */
+       ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
        if (ret)
                return ret;
 
@@ -170,13 +171,17 @@ nouveau_exec_job_run(struct nouveau_job *job)
                nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
        }
 
-       ret = nouveau_fence_emit(fence, chan);
+       ret = nouveau_fence_emit(fence);
        if (ret) {
+               nouveau_fence_unref(&exec_job->fence);
                NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
                WIND_RING(chan);
                return ERR_PTR(ret);
        }
 
+       /* The fence was emitted successfully, set the job's fence pointer to
+        * NULL in order to avoid freeing it up when the job is cleaned up.
+        */
        exec_job->fence = NULL;
 
        return &fence->base;
@@ -189,7 +194,7 @@ nouveau_exec_job_free(struct nouveau_job *job)
 
        nouveau_job_free(job);
 
-       nouveau_fence_unref(&exec_job->fence);
+       kfree(exec_job->fence);
        kfree(exec_job->push.s);
        kfree(exec_job);
 }
index 77c739a..61d9e70 100644 (file)
@@ -205,16 +205,13 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 }
 
 int
-nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
+nouveau_fence_emit(struct nouveau_fence *fence)
 {
+       struct nouveau_channel *chan = fence->channel;
        struct nouveau_fence_chan *fctx = chan->fence;
        struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
        int ret;
 
-       if (unlikely(!chan->fence))
-               return -ENODEV;
-
-       fence->channel  = chan;
        fence->timeout  = jiffies + (15 * HZ);
 
        if (priv->uevent)
@@ -406,18 +403,41 @@ nouveau_fence_unref(struct nouveau_fence **pfence)
 }
 
 int
-nouveau_fence_new(struct nouveau_fence **pfence)
+nouveau_fence_create(struct nouveau_fence **pfence,
+                    struct nouveau_channel *chan)
 {
        struct nouveau_fence *fence;
 
+       if (unlikely(!chan->fence))
+               return -ENODEV;
+
        fence = kzalloc(sizeof(*fence), GFP_KERNEL);
        if (!fence)
                return -ENOMEM;
 
+       fence->channel = chan;
+
        *pfence = fence;
        return 0;
 }
 
+int
+nouveau_fence_new(struct nouveau_fence **pfence,
+                 struct nouveau_channel *chan)
+{
+       int ret = 0;
+
+       ret = nouveau_fence_create(pfence, chan);
+       if (ret)
+               return ret;
+
+       ret = nouveau_fence_emit(*pfence);
+       if (ret)
+               nouveau_fence_unref(pfence);
+
+       return ret;
+}
+
 static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
 {
        return "nouveau";
index 2c72d96..64d33ae 100644 (file)
@@ -17,10 +17,11 @@ struct nouveau_fence {
        unsigned long timeout;
 };
 
-int  nouveau_fence_new(struct nouveau_fence **);
+int  nouveau_fence_create(struct nouveau_fence **, struct nouveau_channel *);
+int  nouveau_fence_new(struct nouveau_fence **, struct nouveau_channel *);
 void nouveau_fence_unref(struct nouveau_fence **);
 
-int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
+int  nouveau_fence_emit(struct nouveau_fence *);
 bool nouveau_fence_done(struct nouveau_fence *);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
 int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
index c0b10d8..a0d303e 100644 (file)
@@ -914,11 +914,8 @@ revalidate:
                }
        }
 
-       ret = nouveau_fence_new(&fence);
-       if (!ret)
-               ret = nouveau_fence_emit(fence, chan);
+       ret = nouveau_fence_new(&fence, chan);
        if (ret) {
-               nouveau_fence_unref(&fence);
                NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
                WIND_RING(chan);
                goto out;
index dfe8295..a9fd9ca 100644 (file)
@@ -145,7 +145,7 @@ runtime_err:
        return ret;
 }
 
-static int omap_hwspinlock_remove(struct platform_device *pdev)
+static void omap_hwspinlock_remove(struct platform_device *pdev)
 {
        struct hwspinlock_device *bank = platform_get_drvdata(pdev);
        int ret;
@@ -153,12 +153,10 @@ static int omap_hwspinlock_remove(struct platform_device *pdev)
        ret = hwspin_lock_unregister(bank);
        if (ret) {
                dev_err(&pdev->dev, "%s failed: %d\n", __func__, ret);
-               return ret;
+               return;
        }
 
        pm_runtime_disable(&pdev->dev);
-
-       return 0;
 }
 
 static const struct of_device_id omap_hwspinlock_of_match[] = {
@@ -171,7 +169,7 @@ MODULE_DEVICE_TABLE(of, omap_hwspinlock_of_match);
 
 static struct platform_driver omap_hwspinlock_driver = {
        .probe          = omap_hwspinlock_probe,
-       .remove         = omap_hwspinlock_remove,
+       .remove_new     = omap_hwspinlock_remove,
        .driver         = {
                .name   = "omap_hwspinlock",
                .of_match_table = omap_hwspinlock_of_match,
index 9cf1863..a0fd67f 100644 (file)
@@ -69,9 +69,18 @@ static const struct hwspinlock_ops qcom_hwspinlock_ops = {
        .unlock         = qcom_hwspinlock_unlock,
 };
 
+static const struct regmap_config sfpb_mutex_config = {
+       .reg_bits               = 32,
+       .reg_stride             = 4,
+       .val_bits               = 32,
+       .max_register           = 0x100,
+       .fast_io                = true,
+};
+
 static const struct qcom_hwspinlock_of_data of_sfpb_mutex = {
        .offset = 0x4,
        .stride = 0x4,
+       .regmap_config = &sfpb_mutex_config,
 };
 
 static const struct regmap_config tcsr_msm8226_mutex_config = {
@@ -197,6 +206,8 @@ static int qcom_hwspinlock_probe(struct platform_device *pdev)
 
                bank->lock[i].priv = devm_regmap_field_alloc(&pdev->dev,
                                                             regmap, field);
+               if (IS_ERR(bank->lock[i].priv))
+                       return PTR_ERR(bank->lock[i].priv);
        }
 
        return devm_hwspin_lock_register(&pdev->dev, bank, &qcom_hwspinlock_ops,
index 67845c0..1edca10 100644 (file)
@@ -120,20 +120,18 @@ static int u8500_hsem_probe(struct platform_device *pdev)
                                         pdata->base_id, num_locks);
 }
 
-static int u8500_hsem_remove(struct platform_device *pdev)
+static void u8500_hsem_remove(struct platform_device *pdev)
 {
        struct hwspinlock_device *bank = platform_get_drvdata(pdev);
        void __iomem *io_base = bank->lock[0].priv - HSEM_REGISTER_OFFSET;
 
        /* clear all interrupts */
        writel(0xFFFF, io_base + HSEM_ICRALL);
-
-       return 0;
 }
 
 static struct platform_driver u8500_hsem_driver = {
        .probe          = u8500_hsem_probe,
-       .remove         = u8500_hsem_remove,
+       .remove_new     = u8500_hsem_remove,
        .driver         = {
                .name   = "u8500_hsem",
        },
index 9cfe8fc..169607e 100644 (file)
@@ -1384,7 +1384,7 @@ config I2C_ICY
 
 config I2C_MLXCPLD
        tristate "Mellanox I2C driver"
-       depends on X86_64 || COMPILE_TEST
+       depends on X86_64 || ARM64 || COMPILE_TEST
        help
          This exposes the Mellanox platform I2C busses to the linux I2C layer
          for X86 based systems.
index cc58fea..0231c5b 100644 (file)
@@ -165,14 +165,15 @@ static int ali15x3_setup(struct pci_dev *ALI15X3_dev)
        }
 
        if(force_addr) {
+               int ret;
+
                dev_info(&ALI15X3_dev->dev, "forcing ISA address 0x%04X\n",
                        ali15x3_smba);
-               if (PCIBIOS_SUCCESSFUL != pci_write_config_word(ALI15X3_dev,
-                                                               SMBBA,
-                                                               ali15x3_smba))
+               ret = pci_write_config_word(ALI15X3_dev, SMBBA, ali15x3_smba);
+               if (ret != PCIBIOS_SUCCESSFUL)
                        goto error;
-               if (PCIBIOS_SUCCESSFUL != pci_read_config_word(ALI15X3_dev,
-                                                               SMBBA, &a))
+               ret = pci_read_config_word(ALI15X3_dev, SMBBA, &a);
+               if (ret != PCIBIOS_SUCCESSFUL)
                        goto error;
                if ((a & ~(ALI15X3_SMB_IOSIZE - 1)) != ali15x3_smba) {
                        /* make sure it works */
index 05ad3bc..db45554 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/pinctrl/consumer.h>
@@ -207,19 +206,15 @@ static int at91_twi_probe(struct platform_device *pdev)
 
        dev->dev = &pdev->dev;
 
-       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!mem)
-               return -ENODEV;
+       dev->base = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
+       if (IS_ERR(dev->base))
+               return PTR_ERR(dev->base);
        phy_addr = mem->start;
 
        dev->pdata = at91_twi_get_driver_data(pdev);
        if (!dev->pdata)
                return -ENODEV;
 
-       dev->base = devm_ioremap_resource(&pdev->dev, mem);
-       if (IS_ERR(dev->base))
-               return PTR_ERR(dev->base);
-
        dev->irq = platform_get_irq(pdev, 0);
        if (dev->irq < 0)
                return dev->irq;
@@ -227,10 +222,9 @@ static int at91_twi_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, dev);
 
        dev->clk = devm_clk_get(dev->dev, NULL);
-       if (IS_ERR(dev->clk)) {
-               dev_err(dev->dev, "no clock defined\n");
-               return -ENODEV;
-       }
+       if (IS_ERR(dev->clk))
+               return dev_err_probe(dev->dev, PTR_ERR(dev->clk), "no clock defined\n");
+
        clk_prepare_enable(dev->clk);
 
        snprintf(dev->adapter.name, sizeof(dev->adapter.name), "AT91");
index c0c3578..d311981 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -832,7 +831,11 @@ static int at91_init_twi_recovery_gpio(struct platform_device *pdev,
        struct i2c_bus_recovery_info *rinfo = &dev->rinfo;
 
        rinfo->pinctrl = devm_pinctrl_get(&pdev->dev);
-       if (!rinfo->pinctrl || IS_ERR(rinfo->pinctrl)) {
+       if (!rinfo->pinctrl) {
+               dev_info(dev->dev, "pinctrl unavailable, bus recovery not supported\n");
+               return 0;
+       }
+       if (IS_ERR(rinfo->pinctrl)) {
                dev_info(dev->dev, "can't get pinctrl, bus recovery not supported\n");
                return PTR_ERR(rinfo->pinctrl);
        }
index e66c12e..8e43f25 100644 (file)
@@ -342,7 +342,6 @@ static void i2c_au1550_remove(struct platform_device *pdev)
        i2c_au1550_disable(priv);
 }
 
-#ifdef CONFIG_PM
 static int i2c_au1550_suspend(struct device *dev)
 {
        struct i2c_au1550_data *priv = dev_get_drvdata(dev);
@@ -361,21 +360,13 @@ static int i2c_au1550_resume(struct device *dev)
        return 0;
 }
 
-static const struct dev_pm_ops i2c_au1550_pmops = {
-       .suspend        = i2c_au1550_suspend,
-       .resume         = i2c_au1550_resume,
-};
-
-#define AU1XPSC_SMBUS_PMOPS (&i2c_au1550_pmops)
-
-#else
-#define AU1XPSC_SMBUS_PMOPS NULL
-#endif
+static DEFINE_SIMPLE_DEV_PM_OPS(i2c_au1550_pmops,
+                               i2c_au1550_suspend, i2c_au1550_resume);
 
 static struct platform_driver au1xpsc_smbus_driver = {
        .driver = {
                .name   = "au1xpsc_smbus",
-               .pm     = AU1XPSC_SMBUS_PMOPS,
+               .pm     = pm_sleep_ptr(&i2c_au1550_pmops),
        },
        .probe          = i2c_au1550_probe,
        .remove_new     = i2c_au1550_remove,
index 05c8068..51aab66 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -1029,7 +1029,6 @@ static int bcm_iproc_i2c_probe(struct platform_device *pdev)
        int irq, ret = 0;
        struct bcm_iproc_i2c_dev *iproc_i2c;
        struct i2c_adapter *adap;
-       struct resource *res;
 
        iproc_i2c = devm_kzalloc(&pdev->dev, sizeof(*iproc_i2c),
                                 GFP_KERNEL);
@@ -1042,15 +1041,12 @@ static int bcm_iproc_i2c_probe(struct platform_device *pdev)
                (enum bcm_iproc_i2c_type)of_device_get_match_data(&pdev->dev);
        init_completion(&iproc_i2c->done);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       iproc_i2c->base = devm_ioremap_resource(iproc_i2c->device, res);
+       iproc_i2c->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(iproc_i2c->base))
                return PTR_ERR(iproc_i2c->base);
 
        if (iproc_i2c->type == IPROC_I2C_NIC) {
-               res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-               iproc_i2c->idm_base = devm_ioremap_resource(iproc_i2c->device,
-                                                           res);
+               iproc_i2c->idm_base = devm_platform_ioremap_resource(pdev, 1);
                if (IS_ERR(iproc_i2c->idm_base))
                        return PTR_ERR(iproc_i2c->idm_base);
 
@@ -1128,8 +1124,6 @@ static void bcm_iproc_i2c_remove(struct platform_device *pdev)
        bcm_iproc_i2c_enable_disable(iproc_i2c, false);
 }
 
-#ifdef CONFIG_PM_SLEEP
-
 static int bcm_iproc_i2c_suspend(struct device *dev)
 {
        struct bcm_iproc_i2c_dev *iproc_i2c = dev_get_drvdata(dev);
@@ -1180,12 +1174,6 @@ static const struct dev_pm_ops bcm_iproc_i2c_pm_ops = {
        .resume_early = &bcm_iproc_i2c_resume
 };
 
-#define BCM_IPROC_I2C_PM_OPS (&bcm_iproc_i2c_pm_ops)
-#else
-#define BCM_IPROC_I2C_PM_OPS NULL
-#endif /* CONFIG_PM_SLEEP */
-
-
 static int bcm_iproc_i2c_reg_slave(struct i2c_client *slave)
 {
        struct bcm_iproc_i2c_dev *iproc_i2c = i2c_get_adapdata(slave->adapter);
@@ -1258,7 +1246,7 @@ static struct platform_driver bcm_iproc_i2c_driver = {
        .driver = {
                .name = "bcm-iproc-i2c",
                .of_match_table = bcm_iproc_i2c_of_match,
-               .pm = BCM_IPROC_I2C_PM_OPS,
+               .pm = pm_sleep_ptr(&bcm_iproc_i2c_pm_ops),
        },
        .probe = bcm_iproc_i2c_probe,
        .remove_new = bcm_iproc_i2c_remove,
index 8ce6d3f..b92de19 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -430,10 +430,9 @@ static int bcm2835_i2c_probe(struct platform_device *pdev)
 
        i2c_dev->bus_clk = bcm2835_i2c_register_div(&pdev->dev, mclk, i2c_dev);
 
-       if (IS_ERR(i2c_dev->bus_clk)) {
-               dev_err(&pdev->dev, "Could not register clock\n");
-               return PTR_ERR(i2c_dev->bus_clk);
-       }
+       if (IS_ERR(i2c_dev->bus_clk))
+               return dev_err_probe(&pdev->dev, PTR_ERR(i2c_dev->bus_clk),
+                                    "Could not register clock\n");
 
        ret = of_property_read_u32(pdev->dev.of_node, "clock-frequency",
                                   &bus_clk_rate);
@@ -444,10 +443,9 @@ static int bcm2835_i2c_probe(struct platform_device *pdev)
        }
 
        ret = clk_set_rate_exclusive(i2c_dev->bus_clk, bus_clk_rate);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "Could not set clock frequency\n");
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(&pdev->dev, ret,
+                                    "Could not set clock frequency\n");
 
        ret = clk_prepare_enable(i2c_dev->bus_clk);
        if (ret) {
index cf92cbc..acee767 100644 (file)
@@ -594,11 +594,10 @@ static int bcm2711_release_bsc(struct brcmstb_i2c_dev *dev)
 
 static int brcmstb_i2c_probe(struct platform_device *pdev)
 {
-       int rc = 0;
        struct brcmstb_i2c_dev *dev;
        struct i2c_adapter *adap;
-       struct resource *iomem;
        const char *int_name;
+       int rc;
 
        /* Allocate memory for private data structure */
        dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
@@ -614,18 +613,15 @@ static int brcmstb_i2c_probe(struct platform_device *pdev)
        init_completion(&dev->done);
 
        /* Map hardware registers */
-       iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       dev->base = devm_ioremap_resource(dev->device, iomem);
-       if (IS_ERR(dev->base)) {
-               rc = -ENOMEM;
-               goto probe_errorout;
-       }
+       dev->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(dev->base))
+               return PTR_ERR(dev->base);
 
        if (of_device_is_compatible(dev->device->of_node,
                                    "brcm,bcm2711-hdmi-i2c")) {
                rc = bcm2711_release_bsc(dev);
                if (rc)
-                       goto probe_errorout;
+                       return rc;
        }
 
        rc = of_property_read_string(dev->device->of_node, "interrupt-names",
@@ -678,16 +674,13 @@ static int brcmstb_i2c_probe(struct platform_device *pdev)
        adap->dev.of_node = pdev->dev.of_node;
        rc = i2c_add_adapter(adap);
        if (rc)
-               goto probe_errorout;
+               return rc;
 
        dev_info(dev->device, "%s@%dhz registered in %s mode\n",
                 int_name ? int_name : " ", dev->clk_freq_hz,
                 (dev->irq >= 0) ? "interrupt" : "polling");
 
        return 0;
-
-probe_errorout:
-       return rc;
 }
 
 static void brcmstb_i2c_remove(struct platform_device *pdev)
@@ -697,7 +690,6 @@ static void brcmstb_i2c_remove(struct platform_device *pdev)
        i2c_del_adapter(&dev->adapter);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int brcmstb_i2c_suspend(struct device *dev)
 {
        struct brcmstb_i2c_dev *i2c_dev = dev_get_drvdata(dev);
@@ -715,10 +707,9 @@ static int brcmstb_i2c_resume(struct device *dev)
 
        return 0;
 }
-#endif
 
-static SIMPLE_DEV_PM_OPS(brcmstb_i2c_pm, brcmstb_i2c_suspend,
-                        brcmstb_i2c_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(brcmstb_i2c_pm, brcmstb_i2c_suspend,
+                               brcmstb_i2c_resume);
 
 static const struct of_device_id brcmstb_i2c_of_match[] = {
        {.compatible = "brcm,brcmstb-i2c"},
@@ -732,7 +723,7 @@ static struct platform_driver brcmstb_i2c_driver = {
        .driver = {
                   .name = "brcmstb-i2c",
                   .of_match_table = brcmstb_i2c_of_match,
-                  .pm = &brcmstb_i2c_pm,
+                  .pm = pm_sleep_ptr(&brcmstb_i2c_pm),
                   },
        .probe = brcmstb_i2c_probe,
        .remove_new = brcmstb_i2c_remove,
index 732daf6..9a664ab 100644 (file)
 #include <linux/i2c.h>
 #include <linux/io.h>
 #include <linux/dma-mapping.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <sysdev/fsl_soc.h>
 #include <asm/cpm.h>
 
index 71b6077..02b3b11 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/slab.h>
 #include <linux/cpufreq.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_data/i2c-davinci.h>
 #include <linux/pm_runtime.h>
 
@@ -765,7 +765,7 @@ static int davinci_i2c_probe(struct platform_device *pdev)
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
-               return dev_err_probe(&pdev->dev, irq, "can't get irq resource\n");
+               return irq;
 
        dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
        if (!dev)
@@ -902,7 +902,6 @@ static void davinci_i2c_remove(struct platform_device *pdev)
        pm_runtime_disable(dev->dev);
 }
 
-#ifdef CONFIG_PM
 static int davinci_i2c_suspend(struct device *dev)
 {
        struct davinci_i2c_dev *i2c_dev = dev_get_drvdata(dev);
@@ -926,15 +925,10 @@ static int davinci_i2c_resume(struct device *dev)
 static const struct dev_pm_ops davinci_i2c_pm = {
        .suspend        = davinci_i2c_suspend,
        .resume         = davinci_i2c_resume,
-       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-                                     pm_runtime_force_resume)
+       NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+                                 pm_runtime_force_resume)
 };
 
-#define davinci_i2c_pm_ops (&davinci_i2c_pm)
-#else
-#define davinci_i2c_pm_ops NULL
-#endif
-
 static const struct platform_device_id davinci_i2c_driver_ids[] = {
        { .name = "i2c_davinci", },
        { /* sentinel */ }
@@ -947,7 +941,7 @@ static struct platform_driver davinci_i2c_driver = {
        .id_table       = davinci_i2c_driver_ids,
        .driver         = {
                .name   = "i2c_davinci",
-               .pm     = davinci_i2c_pm_ops,
+               .pm     = pm_sleep_ptr(&davinci_i2c_pm),
                .of_match_table = davinci_i2c_of_match,
        },
 };
index 24bef00..ca1035e 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
@@ -917,6 +918,17 @@ static int i2c_dw_init_recovery_info(struct dw_i2c_dev *dev)
                return PTR_ERR(gpio);
        rinfo->sda_gpiod = gpio;
 
+       rinfo->pinctrl = devm_pinctrl_get(dev->dev);
+       if (IS_ERR(rinfo->pinctrl)) {
+               if (PTR_ERR(rinfo->pinctrl) == -EPROBE_DEFER)
+                       return PTR_ERR(rinfo->pinctrl);
+
+               rinfo->pinctrl = NULL;
+               dev_err(dev->dev, "getting pinctrl info failed: bus recovery might not work\n");
+       } else if (!rinfo->pinctrl) {
+               dev_dbg(dev->dev, "pinctrl is disabled, bus recovery might not work\n");
+       }
+
        rinfo->recover_bus = i2c_generic_scl_recovery;
        rinfo->prepare_recovery = i2c_dw_prepare_recovery;
        rinfo->unprepare_recovery = i2c_dw_unprepare_recovery;
index 970c1c3..855b698 100644 (file)
@@ -418,7 +418,6 @@ static void dw_i2c_plat_remove(struct platform_device *pdev)
        reset_control_assert(dev->rst);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int dw_i2c_plat_prepare(struct device *dev)
 {
        /*
@@ -429,11 +428,7 @@ static int dw_i2c_plat_prepare(struct device *dev)
         */
        return !has_acpi_companion(dev);
 }
-#else
-#define dw_i2c_plat_prepare    NULL
-#endif
 
-#ifdef CONFIG_PM
 static int dw_i2c_plat_runtime_suspend(struct device *dev)
 {
        struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
@@ -447,7 +442,7 @@ static int dw_i2c_plat_runtime_suspend(struct device *dev)
        return 0;
 }
 
-static int __maybe_unused dw_i2c_plat_suspend(struct device *dev)
+static int dw_i2c_plat_suspend(struct device *dev)
 {
        struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
 
@@ -468,7 +463,7 @@ static int dw_i2c_plat_runtime_resume(struct device *dev)
        return 0;
 }
 
-static int __maybe_unused dw_i2c_plat_resume(struct device *dev)
+static int dw_i2c_plat_resume(struct device *dev)
 {
        struct dw_i2c_dev *i_dev = dev_get_drvdata(dev);
 
@@ -479,16 +474,11 @@ static int __maybe_unused dw_i2c_plat_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops dw_i2c_dev_pm_ops = {
-       .prepare = dw_i2c_plat_prepare,
-       SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume)
-       SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, dw_i2c_plat_runtime_resume, NULL)
+       .prepare = pm_sleep_ptr(dw_i2c_plat_prepare),
+       LATE_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume)
+       RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, dw_i2c_plat_runtime_resume, NULL)
 };
 
-#define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops)
-#else
-#define DW_I2C_DEV_PMOPS NULL
-#endif
-
 /* Work with hotplug and coldplug */
 MODULE_ALIAS("platform:i2c_designware");
 
@@ -499,7 +489,7 @@ static struct platform_driver dw_i2c_driver = {
                .name   = "i2c_designware",
                .of_match_table = of_match_ptr(dw_i2c_of_match),
                .acpi_match_table = ACPI_PTR(dw_i2c_acpi_match),
-               .pm     = DW_I2C_DEV_PMOPS,
+               .pm     = pm_ptr(&dw_i2c_dev_pm_ops),
        },
 };
 
index 4f02cc2..631109c 100644 (file)
@@ -218,10 +218,8 @@ static int dln2_i2c_probe(struct platform_device *pdev)
 
        /* initialize the i2c interface */
        ret = dln2_i2c_enable(dln2, true);
-       if (ret < 0) {
-               dev_err(dev, "failed to initialize adapter: %d\n", ret);
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "failed to initialize adapter\n");
 
        /* and finally attach to i2c layer */
        ret = i2c_add_adapter(&dln2->adapter);
index 4ba93cd..5574094 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
 
index f378cd4..2b0b9cd 100644 (file)
@@ -18,9 +18,7 @@
 #include <linux/clk.h>
 #include <linux/slab.h>
 #include <linux/io.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/spinlock.h>
 
 /*
@@ -892,7 +890,6 @@ static void exynos5_i2c_remove(struct platform_device *pdev)
        clk_unprepare(i2c->pclk);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int exynos5_i2c_suspend_noirq(struct device *dev)
 {
        struct exynos5_i2c *i2c = dev_get_drvdata(dev);
@@ -934,11 +931,10 @@ err_pclk:
        clk_disable_unprepare(i2c->pclk);
        return ret;
 }
-#endif
 
 static const struct dev_pm_ops exynos5_i2c_dev_pm_ops = {
-       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(exynos5_i2c_suspend_noirq,
-                                     exynos5_i2c_resume_noirq)
+       NOIRQ_SYSTEM_SLEEP_PM_OPS(exynos5_i2c_suspend_noirq,
+                                 exynos5_i2c_resume_noirq)
 };
 
 static struct platform_driver exynos5_i2c_driver = {
@@ -946,7 +942,7 @@ static struct platform_driver exynos5_i2c_driver = {
        .remove_new     = exynos5_i2c_remove,
        .driver         = {
                .name   = "exynos5-hsi2c",
-               .pm     = &exynos5_i2c_dev_pm_ops,
+               .pm     = pm_sleep_ptr(&exynos5_i2c_dev_pm_ops),
                .of_match_table = exynos5_i2c_match,
        },
 };
index 70b0de0..efafc05 100644 (file)
@@ -4,8 +4,9 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
 
index 0980c77..dfad5ba 100644 (file)
@@ -470,18 +470,14 @@ static int hisi_i2c_probe(struct platform_device *pdev)
        hisi_i2c_disable_int(ctlr, HISI_I2C_INT_ALL);
 
        ret = devm_request_irq(dev, ctlr->irq, hisi_i2c_irq, 0, "hisi-i2c", ctlr);
-       if (ret) {
-               dev_err(dev, "failed to request irq handler, ret = %d\n", ret);
-               return ret;
-       }
+       if (ret)
+               return dev_err_probe(dev, ret, "failed to request irq handler\n");
 
        ctlr->clk = devm_clk_get_optional_enabled(&pdev->dev, NULL);
        if (IS_ERR_OR_NULL(ctlr->clk)) {
                ret = device_property_read_u64(dev, "clk_rate", &clk_rate_hz);
-               if (ret) {
-                       dev_err(dev, "failed to get clock frequency, ret = %d\n", ret);
-                       return ret;
-               }
+               if (ret)
+                       return dev_err_probe(dev, ret, "failed to get clock frequency\n");
        } else {
                clk_rate_hz = clk_get_rate(ctlr->clk);
        }
index 784a5f5..8e75515 100644 (file)
@@ -475,7 +475,6 @@ static void hix5hd2_i2c_remove(struct platform_device *pdev)
        pm_runtime_set_suspended(priv->dev);
 }
 
-#ifdef CONFIG_PM
 static int hix5hd2_i2c_runtime_suspend(struct device *dev)
 {
        struct hix5hd2_i2c_priv *priv = dev_get_drvdata(dev);
@@ -494,12 +493,11 @@ static int hix5hd2_i2c_runtime_resume(struct device *dev)
 
        return 0;
 }
-#endif
 
 static const struct dev_pm_ops hix5hd2_i2c_pm_ops = {
-       SET_RUNTIME_PM_OPS(hix5hd2_i2c_runtime_suspend,
-                             hix5hd2_i2c_runtime_resume,
-                             NULL)
+       RUNTIME_PM_OPS(hix5hd2_i2c_runtime_suspend,
+                      hix5hd2_i2c_runtime_resume,
+                      NULL)
 };
 
 static const struct of_device_id hix5hd2_i2c_match[] = {
@@ -513,7 +511,7 @@ static struct platform_driver hix5hd2_i2c_driver = {
        .remove_new     = hix5hd2_i2c_remove,
        .driver         = {
                .name   = "hix5hd2-i2c",
-               .pm     = &hix5hd2_i2c_pm_ops,
+               .pm     = pm_ptr(&hix5hd2_i2c_pm_ops),
                .of_match_table = hix5hd2_i2c_match,
        },
 };
index 943b8e6..73ae064 100644 (file)
@@ -1808,7 +1808,6 @@ static void i801_shutdown(struct pci_dev *dev)
        pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int i801_suspend(struct device *dev)
 {
        struct i801_priv *priv = dev_get_drvdata(dev);
@@ -1827,9 +1826,8 @@ static int i801_resume(struct device *dev)
 
        return 0;
 }
-#endif
 
-static SIMPLE_DEV_PM_OPS(i801_pm_ops, i801_suspend, i801_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(i801_pm_ops, i801_suspend, i801_resume);
 
 static struct pci_driver i801_driver = {
        .name           = DRV_NAME,
@@ -1838,7 +1836,7 @@ static struct pci_driver i801_driver = {
        .remove         = i801_remove,
        .shutdown       = i801_shutdown,
        .driver         = {
-               .pm     = &i801_pm_ops,
+               .pm     = pm_sleep_ptr(&i801_pm_ops),
                .probe_type = PROBE_PREFER_ASYNCHRONOUS,
        },
 };
index 1ad9d3b..4088203 100644 (file)
 #include <asm/irq.h>
 #include <linux/io.h>
 #include <linux/i2c.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #include "i2c-ibm_iic.h"
 
index 66ba369..f9d4bfe 100644 (file)
@@ -1454,7 +1454,6 @@ static int img_i2c_runtime_resume(struct device *dev)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int img_i2c_suspend(struct device *dev)
 {
        struct img_i2c *i2c = dev_get_drvdata(dev);
@@ -1482,13 +1481,10 @@ static int img_i2c_resume(struct device *dev)
 
        return 0;
 }
-#endif /* CONFIG_PM_SLEEP */
 
 static const struct dev_pm_ops img_i2c_pm = {
-       SET_RUNTIME_PM_OPS(img_i2c_runtime_suspend,
-                          img_i2c_runtime_resume,
-                          NULL)
-       SET_SYSTEM_SLEEP_PM_OPS(img_i2c_suspend, img_i2c_resume)
+       RUNTIME_PM_OPS(img_i2c_runtime_suspend, img_i2c_runtime_resume, NULL)
+       SYSTEM_SLEEP_PM_OPS(img_i2c_suspend, img_i2c_resume)
 };
 
 static const struct of_device_id img_scb_i2c_match[] = {
@@ -1501,7 +1497,7 @@ static struct platform_driver img_scb_i2c_driver = {
        .driver = {
                .name           = "img-i2c-scb",
                .of_match_table = img_scb_i2c_match,
-               .pm             = &img_i2c_pm,
+               .pm             = pm_ptr(&img_i2c_pm),
        },
        .probe = img_i2c_probe,
        .remove_new = img_i2c_remove,
index 150d923..678b30e 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -517,14 +516,12 @@ static irqreturn_t lpi2c_imx_isr(int irq, void *dev_id)
        temp = readl(lpi2c_imx->base + LPI2C_MSR);
        temp &= enabled;
 
-       if (temp & MSR_RDF)
-               lpi2c_imx_read_rxfifo(lpi2c_imx);
-
-       if (temp & MSR_TDF)
-               lpi2c_imx_write_txfifo(lpi2c_imx);
-
        if (temp & MSR_NDF)
                complete(&lpi2c_imx->complete);
+       else if (temp & MSR_RDF)
+               lpi2c_imx_read_rxfifo(lpi2c_imx);
+       else if (temp & MSR_TDF)
+               lpi2c_imx_write_txfifo(lpi2c_imx);
 
        return IRQ_HANDLED;
 }
@@ -572,10 +569,8 @@ static int lpi2c_imx_probe(struct platform_device *pdev)
                sizeof(lpi2c_imx->adapter.name));
 
        ret = devm_clk_bulk_get_all(&pdev->dev, &lpi2c_imx->clks);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "can't get I2C peripheral clock, ret=%d\n", ret);
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(&pdev->dev, ret, "can't get I2C peripheral clock\n");
        lpi2c_imx->num_clks = ret;
 
        ret = of_property_read_u32(pdev->dev.of_node,
@@ -585,10 +580,8 @@ static int lpi2c_imx_probe(struct platform_device *pdev)
 
        ret = devm_request_irq(&pdev->dev, irq, lpi2c_imx_isr, 0,
                               pdev->name, lpi2c_imx);
-       if (ret) {
-               dev_err(&pdev->dev, "can't claim irq %d\n", irq);
-               return ret;
-       }
+       if (ret)
+               return dev_err_probe(&pdev->dev, ret, "can't claim irq %d\n", irq);
 
        i2c_set_adapdata(&lpi2c_imx->adapter, lpi2c_imx);
        platform_set_drvdata(pdev, lpi2c_imx);
index 65128a7..1775a79 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/hrtimer.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_dma.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_data/i2c-imx.h>
@@ -1389,7 +1388,11 @@ static int i2c_imx_init_recovery_info(struct imx_i2c_struct *i2c_imx,
        struct i2c_bus_recovery_info *rinfo = &i2c_imx->rinfo;
 
        i2c_imx->pinctrl = devm_pinctrl_get(&pdev->dev);
-       if (!i2c_imx->pinctrl || IS_ERR(i2c_imx->pinctrl)) {
+       if (!i2c_imx->pinctrl) {
+               dev_info(&pdev->dev, "pinctrl unavailable, bus recovery not supported\n");
+               return 0;
+       }
+       if (IS_ERR(i2c_imx->pinctrl)) {
                dev_info(&pdev->dev, "can't get pinctrl, bus recovery not supported\n");
                return PTR_ERR(i2c_imx->pinctrl);
        }
@@ -1506,8 +1509,7 @@ static int i2c_imx_probe(struct platform_device *pdev)
                goto rpm_disable;
 
        /* Request IRQ */
-       ret = request_threaded_irq(irq, i2c_imx_isr, NULL, IRQF_SHARED,
-                                  pdev->name, i2c_imx);
+       ret = request_irq(irq, i2c_imx_isr, IRQF_SHARED, pdev->name, i2c_imx);
        if (ret) {
                dev_err(&pdev->dev, "can't claim irq %d\n", irq);
                goto rpm_disable;
index 0dfe603..55035cc 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
index 281058e..e01d753 100644 (file)
@@ -350,10 +350,9 @@ static void kempld_i2c_remove(struct platform_device *pdev)
        i2c_del_adapter(&i2c->adap);
 }
 
-#ifdef CONFIG_PM
-static int kempld_i2c_suspend(struct platform_device *pdev, pm_message_t state)
+static int kempld_i2c_suspend(struct device *dev)
 {
-       struct kempld_i2c_data *i2c = platform_get_drvdata(pdev);
+       struct kempld_i2c_data *i2c = dev_get_drvdata(dev);
        struct kempld_device_data *pld = i2c->pld;
        u8 ctrl;
 
@@ -366,9 +365,9 @@ static int kempld_i2c_suspend(struct platform_device *pdev, pm_message_t state)
        return 0;
 }
 
-static int kempld_i2c_resume(struct platform_device *pdev)
+static int kempld_i2c_resume(struct device *dev)
 {
-       struct kempld_i2c_data *i2c = platform_get_drvdata(pdev);
+       struct kempld_i2c_data *i2c = dev_get_drvdata(dev);
        struct kempld_device_data *pld = i2c->pld;
 
        kempld_get_mutex(pld);
@@ -377,19 +376,17 @@ static int kempld_i2c_resume(struct platform_device *pdev)
 
        return 0;
 }
-#else
-#define kempld_i2c_suspend     NULL
-#define kempld_i2c_resume      NULL
-#endif
+
+static DEFINE_SIMPLE_DEV_PM_OPS(kempld_i2c_pm_ops,
+                               kempld_i2c_suspend, kempld_i2c_resume);
 
 static struct platform_driver kempld_i2c_driver = {
        .driver = {
                .name = "kempld-i2c",
+               .pm = pm_sleep_ptr(&kempld_i2c_pm_ops),
        },
        .probe          = kempld_i2c_probe,
        .remove_new     = kempld_i2c_remove,
-       .suspend        = kempld_i2c_suspend,
-       .resume         = kempld_i2c_resume,
 };
 
 module_platform_driver(kempld_i2c_driver);
index 5c6d965..e366033 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
 #include <linux/time.h>
@@ -431,7 +430,6 @@ static void i2c_lpc2k_remove(struct platform_device *dev)
        i2c_del_adapter(&i2c->adap);
 }
 
-#ifdef CONFIG_PM
 static int i2c_lpc2k_suspend(struct device *dev)
 {
        struct lpc2k_i2c *i2c = dev_get_drvdata(dev);
@@ -456,11 +454,6 @@ static const struct dev_pm_ops i2c_lpc2k_dev_pm_ops = {
        .resume_noirq = i2c_lpc2k_resume,
 };
 
-#define I2C_LPC2K_DEV_PM_OPS (&i2c_lpc2k_dev_pm_ops)
-#else
-#define I2C_LPC2K_DEV_PM_OPS NULL
-#endif
-
 static const struct of_device_id lpc2k_i2c_match[] = {
        { .compatible = "nxp,lpc1788-i2c" },
        {},
@@ -472,7 +465,7 @@ static struct platform_driver i2c_lpc2k_driver = {
        .remove_new = i2c_lpc2k_remove,
        .driver = {
                .name           = "lpc2k-i2c",
-               .pm             = I2C_LPC2K_DEV_PM_OPS,
+               .pm             = pm_sleep_ptr(&i2c_lpc2k_dev_pm_ops),
                .of_match_table = lpc2k_i2c_match,
        },
 };
index 16026c8..c7b203c 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/types.h>
 
index 7f58f7e..0b0a1c4 100644 (file)
@@ -378,9 +378,8 @@ static int mchp_corei2c_probe(struct platform_device *pdev)
                return PTR_ERR(idev->base);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq <= 0)
-               return dev_err_probe(&pdev->dev, -ENXIO,
-                                    "invalid IRQ %d for I2C controller\n", irq);
+       if (irq < 0)
+               return irq;
 
        idev->i2c_clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(idev->i2c_clk))
index ae66bdd..b3a7392 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/string.h>
 
@@ -1080,13 +1080,7 @@ static int mlxbf_i2c_init_resource(struct platform_device *pdev,
        if (!tmp_res)
                return -ENOMEM;
 
-       tmp_res->params = platform_get_resource(pdev, IORESOURCE_MEM, type);
-       if (!tmp_res->params) {
-               devm_kfree(dev, tmp_res);
-               return -EIO;
-       }
-
-       tmp_res->io = devm_ioremap_resource(dev, tmp_res->params);
+       tmp_res->io = devm_platform_get_and_ioremap_resource(pdev, type, &tmp_res->params);
        if (IS_ERR(tmp_res->io)) {
                devm_kfree(dev, tmp_res);
                return PTR_ERR(tmp_res->io);
@@ -2323,10 +2317,8 @@ static int mlxbf_i2c_probe(struct platform_device *pdev)
 
                ret = mlxbf_i2c_init_resource(pdev, &priv->smbus,
                                              MLXBF_I2C_SMBUS_RES);
-               if (ret < 0) {
-                       dev_err(dev, "Cannot fetch smbus resource info");
-                       return ret;
-               }
+               if (ret < 0)
+                       return dev_err_probe(dev, ret, "Cannot fetch smbus resource info");
 
                priv->timer->io = priv->smbus->io;
                priv->mst->io = priv->smbus->io + MLXBF_I2C_MST_ADDR_OFFSET;
@@ -2334,39 +2326,29 @@ static int mlxbf_i2c_probe(struct platform_device *pdev)
        } else {
                ret = mlxbf_i2c_init_resource(pdev, &priv->timer,
                                              MLXBF_I2C_SMBUS_TIMER_RES);
-               if (ret < 0) {
-                       dev_err(dev, "Cannot fetch timer resource info");
-                       return ret;
-               }
+               if (ret < 0)
+                       return dev_err_probe(dev, ret, "Cannot fetch timer resource info");
 
                ret = mlxbf_i2c_init_resource(pdev, &priv->mst,
                                              MLXBF_I2C_SMBUS_MST_RES);
-               if (ret < 0) {
-                       dev_err(dev, "Cannot fetch master resource info");
-                       return ret;
-               }
+               if (ret < 0)
+                       return dev_err_probe(dev, ret, "Cannot fetch master resource info");
 
                ret = mlxbf_i2c_init_resource(pdev, &priv->slv,
                                              MLXBF_I2C_SMBUS_SLV_RES);
-               if (ret < 0) {
-                       dev_err(dev, "Cannot fetch slave resource info");
-                       return ret;
-               }
+               if (ret < 0)
+                       return dev_err_probe(dev, ret, "Cannot fetch slave resource info");
        }
 
        ret = mlxbf_i2c_init_resource(pdev, &priv->mst_cause,
                                      MLXBF_I2C_MST_CAUSE_RES);
-       if (ret < 0) {
-               dev_err(dev, "Cannot fetch cause master resource info");
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "Cannot fetch cause master resource info");
 
        ret = mlxbf_i2c_init_resource(pdev, &priv->slv_cause,
                                      MLXBF_I2C_SLV_CAUSE_RES);
-       if (ret < 0) {
-               dev_err(dev, "Cannot fetch cause slave resource info");
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "Cannot fetch cause slave resource info");
 
        adap = &priv->adap;
        adap->owner = THIS_MODULE;
@@ -2397,11 +2379,9 @@ static int mlxbf_i2c_probe(struct platform_device *pdev)
         * does not really hurt, then keep the code as is.
         */
        ret = mlxbf_i2c_init_master(pdev, priv);
-       if (ret < 0) {
-               dev_err(dev, "failed to initialize smbus master %d",
-                       priv->bus);
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "failed to initialize smbus master %d",
+                                    priv->bus);
 
        mlxbf_i2c_init_timings(pdev, priv);
 
@@ -2413,10 +2393,8 @@ static int mlxbf_i2c_probe(struct platform_device *pdev)
        ret = devm_request_irq(dev, irq, mlxbf_i2c_irq,
                               IRQF_SHARED | IRQF_PROBE_SHARED,
                               dev_name(dev), priv);
-       if (ret < 0) {
-               dev_err(dev, "Cannot get irq %d\n", irq);
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "Cannot get irq %d\n", irq);
 
        priv->irq = irq;
 
index c42fd4b..6fec64e 100644 (file)
@@ -22,6 +22,7 @@
 #define MLXCPLD_I2C_BUS_NUM            1
 #define MLXCPLD_I2C_DATA_REG_SZ                36
 #define MLXCPLD_I2C_DATA_SZ_BIT                BIT(5)
+#define MLXCPLD_I2C_DATA_EXT2_SZ_BIT   BIT(6)
 #define MLXCPLD_I2C_DATA_SZ_MASK       GENMASK(6, 5)
 #define MLXCPLD_I2C_SMBUS_BLK_BIT      BIT(7)
 #define MLXCPLD_I2C_MAX_ADDR_LEN       4
@@ -466,6 +467,13 @@ static const struct i2c_adapter_quirks mlxcpld_i2c_quirks_ext = {
        .max_comb_1st_msg_len = 4,
 };
 
+static const struct i2c_adapter_quirks mlxcpld_i2c_quirks_ext2 = {
+       .flags = I2C_AQ_COMB_WRITE_THEN_READ,
+       .max_read_len = (MLXCPLD_I2C_DATA_REG_SZ - 4) * 4,
+       .max_write_len = (MLXCPLD_I2C_DATA_REG_SZ - 4) * 4 + MLXCPLD_I2C_MAX_ADDR_LEN,
+       .max_comb_1st_msg_len = 4,
+};
+
 static struct i2c_adapter mlxcpld_i2c_adapter = {
        .owner          = THIS_MODULE,
        .name           = "i2c-mlxcpld",
@@ -547,6 +555,8 @@ static int mlxcpld_i2c_probe(struct platform_device *pdev)
        /* Check support for extended transaction length */
        if ((val & MLXCPLD_I2C_DATA_SZ_MASK) == MLXCPLD_I2C_DATA_SZ_BIT)
                mlxcpld_i2c_adapter.quirks = &mlxcpld_i2c_quirks_ext;
+       else if ((val & MLXCPLD_I2C_DATA_SZ_MASK) == MLXCPLD_I2C_DATA_EXT2_SZ_BIT)
+               mlxcpld_i2c_adapter.quirks = &mlxcpld_i2c_quirks_ext2;
        /* Check support for smbus block transaction */
        if (val & MLXCPLD_I2C_SMBUS_BLK_BIT)
                priv->smbus_block = true;
index f460a7f..e4e4995 100644 (file)
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched/signal.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/slab.h>
 
index 7ca3f22..1a9b5a0 100644 (file)
@@ -19,9 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/sched.h>
@@ -1514,7 +1512,6 @@ static void mtk_i2c_remove(struct platform_device *pdev)
        clk_bulk_unprepare(I2C_MT65XX_CLK_MAX, i2c->clocks);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int mtk_i2c_suspend_noirq(struct device *dev)
 {
        struct mtk_i2c *i2c = dev_get_drvdata(dev);
@@ -1544,11 +1541,10 @@ static int mtk_i2c_resume_noirq(struct device *dev)
 
        return 0;
 }
-#endif
 
 static const struct dev_pm_ops mtk_i2c_pm = {
-       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_i2c_suspend_noirq,
-                                     mtk_i2c_resume_noirq)
+       NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_i2c_suspend_noirq,
+                                 mtk_i2c_resume_noirq)
 };
 
 static struct platform_driver mtk_i2c_driver = {
@@ -1556,7 +1552,7 @@ static struct platform_driver mtk_i2c_driver = {
        .remove_new = mtk_i2c_remove,
        .driver = {
                .name = I2C_DRV_NAME,
-               .pm = &mtk_i2c_pm,
+               .pm = pm_sleep_ptr(&mtk_i2c_pm),
                .of_match_table = mtk_i2c_of_match,
        },
 };
index 104bb19..81d4616 100644 (file)
@@ -16,7 +16,8 @@
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/reset.h>
 
 #define REG_SM0CFG2_REG                0x28
index 1d76f1c..36def0a 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/io.h>
 #include <linux/stmp_device.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/dma/mxs-dma.h>
index 7772783..38d203d 100644 (file)
@@ -327,8 +327,8 @@ static int nforce2_probe_smb(struct pci_dev *dev, int bar, int alt_reg,
                /* Older incarnations of the device used non-standard BARs */
                u16 iobase;
 
-               if (pci_read_config_word(dev, alt_reg, &iobase)
-                   != PCIBIOS_SUCCESSFUL) {
+               error = pci_read_config_word(dev, alt_reg, &iobase);
+               if (error != PCIBIOS_SUCCESSFUL) {
                        dev_err(&dev->dev, "Error reading PCI config for %s\n",
                                name);
                        return -EIO;
index 212f412..b10574d 100644 (file)
@@ -873,7 +873,6 @@ static irqreturn_t i2c_irq_handler(int irq, void *arg)
        return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int nmk_i2c_suspend_late(struct device *dev)
 {
        int ret;
@@ -890,9 +889,7 @@ static int nmk_i2c_resume_early(struct device *dev)
 {
        return pm_runtime_force_resume(dev);
 }
-#endif
 
-#ifdef CONFIG_PM
 static int nmk_i2c_runtime_suspend(struct device *dev)
 {
        struct amba_device *adev = to_amba_device(dev);
@@ -925,13 +922,10 @@ static int nmk_i2c_runtime_resume(struct device *dev)
 
        return ret;
 }
-#endif
 
 static const struct dev_pm_ops nmk_i2c_pm = {
-       SET_LATE_SYSTEM_SLEEP_PM_OPS(nmk_i2c_suspend_late, nmk_i2c_resume_early)
-       SET_RUNTIME_PM_OPS(nmk_i2c_runtime_suspend,
-                       nmk_i2c_runtime_resume,
-                       NULL)
+       LATE_SYSTEM_SLEEP_PM_OPS(nmk_i2c_suspend_late, nmk_i2c_resume_early)
+       RUNTIME_PM_OPS(nmk_i2c_runtime_suspend, nmk_i2c_runtime_resume, NULL)
 };
 
 static unsigned int nmk_i2c_functionality(struct i2c_adapter *adap)
@@ -1078,7 +1072,7 @@ static struct amba_driver nmk_i2c_driver = {
        .drv = {
                .owner = THIS_MODULE,
                .name = DRIVER_NAME,
-               .pm = &nmk_i2c_pm,
+               .pm = pm_ptr(&nmk_i2c_pm),
        },
        .id_table = nmk_i2c_ids,
        .probe = nmk_i2c_probe,
index 53b65ff..495a8b5 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
index 4ac77e5..041a76f 100644 (file)
@@ -743,7 +743,6 @@ static void ocores_i2c_remove(struct platform_device *pdev)
        i2c_del_adapter(&i2c->adap);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int ocores_i2c_suspend(struct device *dev)
 {
        struct ocores_i2c *i2c = dev_get_drvdata(dev);
@@ -772,11 +771,8 @@ static int ocores_i2c_resume(struct device *dev)
        return ocores_init(dev, i2c);
 }
 
-static SIMPLE_DEV_PM_OPS(ocores_i2c_pm, ocores_i2c_suspend, ocores_i2c_resume);
-#define OCORES_I2C_PM  (&ocores_i2c_pm)
-#else
-#define OCORES_I2C_PM  NULL
-#endif
+static DEFINE_SIMPLE_DEV_PM_OPS(ocores_i2c_pm,
+                               ocores_i2c_suspend, ocores_i2c_resume);
 
 static struct platform_driver ocores_i2c_driver = {
        .probe   = ocores_i2c_probe,
@@ -784,7 +780,7 @@ static struct platform_driver ocores_i2c_driver = {
        .driver  = {
                .name = "ocores-i2c",
                .of_match_table = ocores_i2c_match,
-               .pm = OCORES_I2C_PM,
+               .pm = pm_sleep_ptr(&ocores_i2c_pm),
        },
 };
 
index 5f0ef8c..777f1a0 100644 (file)
@@ -16,7 +16,8 @@
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 /* I2C registers */
 #define OWL_I2C_REG_CTL                0x0000
index d2a9e7b..b8d5480 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/gpio/consumer.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #include <asm/irq.h>
 
index 8240005..a12525b 100644 (file)
@@ -613,7 +613,6 @@ static const struct i2c_algorithm pnx_algorithm = {
        .functionality = i2c_pnx_func,
 };
 
-#ifdef CONFIG_PM_SLEEP
 static int i2c_pnx_controller_suspend(struct device *dev)
 {
        struct i2c_pnx_algo_data *alg_data = dev_get_drvdata(dev);
@@ -630,12 +629,9 @@ static int i2c_pnx_controller_resume(struct device *dev)
        return clk_prepare_enable(alg_data->clk);
 }
 
-static SIMPLE_DEV_PM_OPS(i2c_pnx_pm,
-                        i2c_pnx_controller_suspend, i2c_pnx_controller_resume);
-#define PNX_I2C_PM     (&i2c_pnx_pm)
-#else
-#define PNX_I2C_PM     NULL
-#endif
+static DEFINE_SIMPLE_DEV_PM_OPS(i2c_pnx_pm,
+                               i2c_pnx_controller_suspend,
+                               i2c_pnx_controller_resume);
 
 static int i2c_pnx_probe(struct platform_device *pdev)
 {
@@ -683,8 +679,7 @@ static int i2c_pnx_probe(struct platform_device *pdev)
                 "%s", pdev->name);
 
        /* Register I/O resource */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       alg_data->ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       alg_data->ioaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(alg_data->ioaddr))
                return PTR_ERR(alg_data->ioaddr);
 
@@ -763,7 +758,7 @@ static struct platform_driver i2c_pnx_driver = {
        .driver = {
                .name = "pnx-i2c",
                .of_match_table = of_match_ptr(i2c_pnx_of_match),
-               .pm = PNX_I2C_PM,
+               .pm = pm_sleep_ptr(&i2c_pnx_pm),
        },
        .probe = i2c_pnx_probe,
        .remove_new = i2c_pnx_remove,
index 30e38bc..08b3229 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/platform_device.h>
 #include <linux/platform_data/i2c-pxa.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_address.h>
 
 #define CE4100_PCI_I2C_DEVS    3
index 937f7ee..29be05a 100644 (file)
@@ -1362,7 +1362,7 @@ static int i2c_pxa_probe(struct platform_device *dev)
        struct i2c_pxa_platform_data *plat = dev_get_platdata(&dev->dev);
        enum pxa_i2c_types i2c_type;
        struct pxa_i2c *i2c;
-       struct resource *res = NULL;
+       struct resource *res;
        int ret, irq;
 
        i2c = devm_kzalloc(&dev->dev, sizeof(struct pxa_i2c), GFP_KERNEL);
@@ -1379,8 +1379,7 @@ static int i2c_pxa_probe(struct platform_device *dev)
        i2c->adap.dev.of_node = dev->dev.of_node;
 #endif
 
-       res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-       i2c->reg_base = devm_ioremap_resource(&dev->dev, res);
+       i2c->reg_base = devm_platform_get_and_ioremap_resource(dev, 0, &res);
        if (IS_ERR(i2c->reg_base))
                return PTR_ERR(i2c->reg_base);
 
@@ -1404,10 +1403,9 @@ static int i2c_pxa_probe(struct platform_device *dev)
        strscpy(i2c->adap.name, "pxa_i2c-i2c", sizeof(i2c->adap.name));
 
        i2c->clk = devm_clk_get(&dev->dev, NULL);
-       if (IS_ERR(i2c->clk)) {
-               dev_err(&dev->dev, "failed to get the clk: %ld\n", PTR_ERR(i2c->clk));
-               return PTR_ERR(i2c->clk);
-       }
+       if (IS_ERR(i2c->clk))
+               return dev_err_probe(&dev->dev, PTR_ERR(i2c->clk),
+                                    "failed to get the clk\n");
 
        i2c->reg_ibmr = i2c->reg_base + pxa_reg_layout[i2c_type].ibmr;
        i2c->reg_idbr = i2c->reg_base + pxa_reg_layout[i2c_type].idbr;
@@ -1491,7 +1489,6 @@ static void i2c_pxa_remove(struct platform_device *dev)
        clk_disable_unprepare(i2c->clk);
 }
 
-#ifdef CONFIG_PM
 static int i2c_pxa_suspend_noirq(struct device *dev)
 {
        struct pxa_i2c *i2c = dev_get_drvdata(dev);
@@ -1516,17 +1513,12 @@ static const struct dev_pm_ops i2c_pxa_dev_pm_ops = {
        .resume_noirq = i2c_pxa_resume_noirq,
 };
 
-#define I2C_PXA_DEV_PM_OPS (&i2c_pxa_dev_pm_ops)
-#else
-#define I2C_PXA_DEV_PM_OPS NULL
-#endif
-
 static struct platform_driver i2c_pxa_driver = {
        .probe          = i2c_pxa_probe,
        .remove_new     = i2c_pxa_remove,
        .driver         = {
                .name   = "pxa2xx-i2c",
-               .pm     = I2C_PXA_DEV_PM_OPS,
+               .pm     = pm_sleep_ptr(&i2c_pxa_dev_pm_ops),
                .of_match_table = i2c_pxa_dt_ids,
        },
        .id_table       = i2c_pxa_id_table,
index 622dc14..414882c 100644 (file)
@@ -588,10 +588,10 @@ static int cci_probe(struct platform_device *pdev)
        /* Clocks */
 
        ret = devm_clk_bulk_get_all(dev, &cci->clocks);
-       if (ret < 1) {
-               dev_err(dev, "failed to get clocks %d\n", ret);
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "failed to get clocks\n");
+       else if (!ret)
+               return dev_err_probe(dev, -EINVAL, "not enough clocks in DT\n");
        cci->nclocks = ret;
 
        /* Retrieve CCI clock rate */
index b670a67..229353e 100644 (file)
@@ -767,7 +767,6 @@ err_tx:
 static int geni_i2c_probe(struct platform_device *pdev)
 {
        struct geni_i2c_dev *gi2c;
-       struct resource *res;
        u32 proto, tx_depth, fifo_disable;
        int ret;
        struct device *dev = &pdev->dev;
@@ -779,8 +778,7 @@ static int geni_i2c_probe(struct platform_device *pdev)
 
        gi2c->se.dev = dev;
        gi2c->se.wrapper = dev_get_drvdata(dev->parent);
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       gi2c->se.base = devm_ioremap_resource(dev, res);
+       gi2c->se.base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(gi2c->se.base))
                return PTR_ERR(gi2c->se.base);
 
index ae90170..598102d 100644 (file)
@@ -1927,7 +1927,6 @@ static void qup_i2c_remove(struct platform_device *pdev)
        pm_runtime_set_suspended(qup->dev);
 }
 
-#ifdef CONFIG_PM
 static int qup_i2c_pm_suspend_runtime(struct device *device)
 {
        struct qup_i2c_dev *qup = dev_get_drvdata(device);
@@ -1945,9 +1944,7 @@ static int qup_i2c_pm_resume_runtime(struct device *device)
        qup_i2c_enable_clocks(qup);
        return 0;
 }
-#endif
 
-#ifdef CONFIG_PM_SLEEP
 static int qup_i2c_suspend(struct device *device)
 {
        if (!pm_runtime_suspended(device))
@@ -1962,16 +1959,11 @@ static int qup_i2c_resume(struct device *device)
        pm_request_autosuspend(device);
        return 0;
 }
-#endif
 
 static const struct dev_pm_ops qup_i2c_qup_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(
-               qup_i2c_suspend,
-               qup_i2c_resume)
-       SET_RUNTIME_PM_OPS(
-               qup_i2c_pm_suspend_runtime,
-               qup_i2c_pm_resume_runtime,
-               NULL)
+       SYSTEM_SLEEP_PM_OPS(qup_i2c_suspend, qup_i2c_resume)
+       RUNTIME_PM_OPS(qup_i2c_pm_suspend_runtime,
+                      qup_i2c_pm_resume_runtime, NULL)
 };
 
 static const struct of_device_id qup_i2c_dt_match[] = {
@@ -1987,7 +1979,7 @@ static struct platform_driver qup_i2c_driver = {
        .remove_new = qup_i2c_remove,
        .driver = {
                .name = "i2c_qup",
-               .pm = &qup_i2c_qup_pm_ops,
+               .pm = pm_ptr(&qup_i2c_qup_pm_ops),
                .of_match_table = qup_i2c_dt_match,
                .acpi_match_table = ACPI_PTR(qup_i2c_acpi_match),
        },
index 2d9c374..a32a93f 100644 (file)
@@ -24,7 +24,7 @@
 #include <linux/i2c-smbus.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
@@ -1169,7 +1169,6 @@ static void rcar_i2c_remove(struct platform_device *pdev)
        pm_runtime_disable(dev);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int rcar_i2c_suspend(struct device *dev)
 {
        struct rcar_i2c_priv *priv = dev_get_drvdata(dev);
@@ -1187,19 +1186,14 @@ static int rcar_i2c_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops rcar_i2c_pm_ops = {
-       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(rcar_i2c_suspend, rcar_i2c_resume)
+       NOIRQ_SYSTEM_SLEEP_PM_OPS(rcar_i2c_suspend, rcar_i2c_resume)
 };
 
-#define DEV_PM_OPS (&rcar_i2c_pm_ops)
-#else
-#define DEV_PM_OPS NULL
-#endif /* CONFIG_PM_SLEEP */
-
 static struct platform_driver rcar_i2c_driver = {
        .driver = {
                .name   = "i2c-rcar",
                .of_match_table = rcar_i2c_dt_ids,
-               .pm     = DEV_PM_OPS,
+               .pm     = pm_sleep_ptr(&rcar_i2c_pm_ops),
        },
        .probe          = rcar_i2c_probe,
        .remove_new     = rcar_i2c_remove,
index 5f8c0bd..f0ee887 100644 (file)
@@ -42,7 +42,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
index 28f0e5c..127eb38 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/gpio/consumer.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/mfd/syscon.h>
@@ -1034,9 +1033,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
        dev_dbg(&pdev->dev, "clock source %p\n", i2c->clk);
 
        /* map the registers */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       i2c->regs = devm_ioremap_resource(&pdev->dev, res);
-
+       i2c->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(i2c->regs))
                return PTR_ERR(i2c->regs);
 
@@ -1076,7 +1073,6 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev)
        if (!(i2c->quirks & QUIRK_POLL)) {
                i2c->irq = ret = platform_get_irq(pdev, 0);
                if (ret < 0) {
-                       dev_err(&pdev->dev, "cannot find IRQ\n");
                        clk_unprepare(i2c->clk);
                        return ret;
                }
@@ -1125,7 +1121,6 @@ static void s3c24xx_i2c_remove(struct platform_device *pdev)
        i2c_del_adapter(&i2c->adap);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int s3c24xx_i2c_suspend_noirq(struct device *dev)
 {
        struct s3c24xx_i2c *i2c = dev_get_drvdata(dev);
@@ -1155,26 +1150,19 @@ static int s3c24xx_i2c_resume_noirq(struct device *dev)
 
        return 0;
 }
-#endif
 
-#ifdef CONFIG_PM
 static const struct dev_pm_ops s3c24xx_i2c_dev_pm_ops = {
-       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(s3c24xx_i2c_suspend_noirq,
-                                     s3c24xx_i2c_resume_noirq)
+       NOIRQ_SYSTEM_SLEEP_PM_OPS(s3c24xx_i2c_suspend_noirq,
+                                 s3c24xx_i2c_resume_noirq)
 };
 
-#define S3C24XX_DEV_PM_OPS (&s3c24xx_i2c_dev_pm_ops)
-#else
-#define S3C24XX_DEV_PM_OPS NULL
-#endif
-
 static struct platform_driver s3c24xx_i2c_driver = {
        .probe          = s3c24xx_i2c_probe,
        .remove_new     = s3c24xx_i2c_remove,
        .id_table       = s3c24xx_driver_ids,
        .driver         = {
                .name   = "s3c-i2c",
-               .pm     = S3C24XX_DEV_PM_OPS,
+               .pm     = pm_sleep_ptr(&s3c24xx_i2c_dev_pm_ops),
                .of_match_table = of_match_ptr(s3c24xx_i2c_match),
        },
 };
index 21717b9..5adbe62 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
@@ -871,7 +871,6 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
 {
        struct sh_mobile_i2c_data *pd;
        struct i2c_adapter *adap;
-       struct resource *res;
        const struct sh_mobile_dt_config *config;
        int ret;
        u32 bus_speed;
@@ -893,10 +892,7 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
        pd->dev = &dev->dev;
        platform_set_drvdata(dev, pd);
 
-       res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-
-       pd->res = res;
-       pd->reg = devm_ioremap_resource(&dev->dev, res);
+       pd->reg = devm_platform_get_and_ioremap_resource(dev, 0, &pd->res);
        if (IS_ERR(pd->reg))
                return PTR_ERR(pd->reg);
 
@@ -905,7 +901,7 @@ static int sh_mobile_i2c_probe(struct platform_device *dev)
        pd->clks_per_count = 1;
 
        /* Newer variants come with two new bits in ICIC */
-       if (resource_size(res) > 0x17)
+       if (resource_size(pd->res) > 0x17)
                pd->flags |= IIC_FLAG_HAS_ICIC67;
 
        pm_runtime_enable(&dev->dev);
@@ -965,7 +961,6 @@ static void sh_mobile_i2c_remove(struct platform_device *dev)
        pm_runtime_disable(&dev->dev);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int sh_mobile_i2c_suspend(struct device *dev)
 {
        struct sh_mobile_i2c_data *pd = dev_get_drvdata(dev);
@@ -983,20 +978,15 @@ static int sh_mobile_i2c_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops sh_mobile_i2c_pm_ops = {
-       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(sh_mobile_i2c_suspend,
-                                     sh_mobile_i2c_resume)
+       NOIRQ_SYSTEM_SLEEP_PM_OPS(sh_mobile_i2c_suspend,
+                                 sh_mobile_i2c_resume)
 };
 
-#define DEV_PM_OPS (&sh_mobile_i2c_pm_ops)
-#else
-#define DEV_PM_OPS NULL
-#endif /* CONFIG_PM_SLEEP */
-
 static struct platform_driver sh_mobile_i2c_driver = {
        .driver         = {
                .name           = "i2c-sh_mobile",
                .of_match_table = sh_mobile_i2c_dt_ids,
-               .pm     = DEV_PM_OPS,
+               .pm     = pm_sleep_ptr(&sh_mobile_i2c_pm_ops),
        },
        .probe          = sh_mobile_i2c_probe,
        .remove_new     = sh_mobile_i2c_remove,
index c793a5c..486f1e9 100644 (file)
@@ -175,11 +175,11 @@ static int sis5595_setup(struct pci_dev *SIS5595_dev)
 
        if (force_addr) {
                dev_info(&SIS5595_dev->dev, "forcing ISA address 0x%04X\n", sis5595_base);
-               if (pci_write_config_word(SIS5595_dev, ACPI_BASE, sis5595_base)
-                   != PCIBIOS_SUCCESSFUL)
+               retval = pci_write_config_word(SIS5595_dev, ACPI_BASE, sis5595_base);
+               if (retval != PCIBIOS_SUCCESSFUL)
                        goto error;
-               if (pci_read_config_word(SIS5595_dev, ACPI_BASE, &a)
-                   != PCIBIOS_SUCCESSFUL)
+               retval = pci_read_config_word(SIS5595_dev, ACPI_BASE, &a);
+               if (retval != PCIBIOS_SUCCESSFUL)
                        goto error;
                if ((a & ~(SIS5595_EXTENT - 1)) != sis5595_base) {
                        /* doesn't work for some chips! */
@@ -188,16 +188,16 @@ static int sis5595_setup(struct pci_dev *SIS5595_dev)
                }
        }
 
-       if (pci_read_config_byte(SIS5595_dev, SIS5595_ENABLE_REG, &val)
-           != PCIBIOS_SUCCESSFUL)
+       retval = pci_read_config_byte(SIS5595_dev, SIS5595_ENABLE_REG, &val);
+       if (retval != PCIBIOS_SUCCESSFUL)
                goto error;
        if ((val & 0x80) == 0) {
                dev_info(&SIS5595_dev->dev, "enabling ACPI\n");
-               if (pci_write_config_byte(SIS5595_dev, SIS5595_ENABLE_REG, val | 0x80)
-                   != PCIBIOS_SUCCESSFUL)
+               retval = pci_write_config_byte(SIS5595_dev, SIS5595_ENABLE_REG, val | 0x80);
+               if (retval != PCIBIOS_SUCCESSFUL)
                        goto error;
-               if (pci_read_config_byte(SIS5595_dev, SIS5595_ENABLE_REG, &val)
-                   != PCIBIOS_SUCCESSFUL)
+               retval = pci_read_config_byte(SIS5595_dev, SIS5595_ENABLE_REG, &val);
+               if (retval != PCIBIOS_SUCCESSFUL)
                        goto error;
                if ((val & 0x80) == 0) {
                        /* doesn't work for some chips? */
index ffc54fb..c52d1be 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
index 25c3521..ce23334 100644 (file)
@@ -812,8 +812,7 @@ static int st_i2c_probe(struct platform_device *pdev)
        if (!i2c_dev)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       i2c_dev->base = devm_ioremap_resource(&pdev->dev, res);
+       i2c_dev->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(i2c_dev->base))
                return PTR_ERR(i2c_dev->base);
 
index 6ad06a5..ecc5479 100644 (file)
@@ -767,8 +767,7 @@ static int stm32f4_i2c_probe(struct platform_device *pdev)
        if (!i2c_dev)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       i2c_dev->base = devm_ioremap_resource(&pdev->dev, res);
+       i2c_dev->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(i2c_dev->base))
                return PTR_ERR(i2c_dev->base);
 
index e897d91..579b305 100644 (file)
@@ -2121,12 +2121,12 @@ static int stm32f7_i2c_probe(struct platform_device *pdev)
        phy_addr = (dma_addr_t)res->start;
 
        irq_event = platform_get_irq(pdev, 0);
-       if (irq_event <= 0)
-               return irq_event ? : -ENOENT;
+       if (irq_event < 0)
+               return irq_event;
 
        irq_error = platform_get_irq(pdev, 1);
-       if (irq_error <= 0)
-               return irq_error ? : -ENOENT;
+       if (irq_error < 0)
+               return irq_error;
 
        i2c_dev->wakeup_src = of_property_read_bool(pdev->dev.of_node,
                                                    "wakeup-source");
index 4cc196c..bbea521 100644 (file)
@@ -557,20 +557,16 @@ static int synquacer_i2c_probe(struct platform_device *pdev)
                dev_dbg(&pdev->dev, "clock source %p\n", i2c->pclk);
 
                ret = clk_prepare_enable(i2c->pclk);
-               if (ret) {
-                       dev_err(&pdev->dev, "failed to enable clock (%d)\n",
-                               ret);
-                       return ret;
-               }
+               if (ret)
+                       return dev_err_probe(&pdev->dev, ret, "failed to enable clock\n");
                i2c->pclkrate = clk_get_rate(i2c->pclk);
        }
 
        if (i2c->pclkrate < SYNQUACER_I2C_MIN_CLK_RATE ||
-           i2c->pclkrate > SYNQUACER_I2C_MAX_CLK_RATE) {
-               dev_err(&pdev->dev, "PCLK missing or out of range (%d)\n",
-                       i2c->pclkrate);
-               return -EINVAL;
-       }
+           i2c->pclkrate > SYNQUACER_I2C_MAX_CLK_RATE)
+               return dev_err_probe(&pdev->dev, -EINVAL,
+                                    "PCLK missing or out of range (%d)\n",
+                                    i2c->pclkrate);
 
        i2c->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(i2c->base))
@@ -582,10 +578,8 @@ static int synquacer_i2c_probe(struct platform_device *pdev)
 
        ret = devm_request_irq(&pdev->dev, i2c->irq, synquacer_i2c_isr,
                               0, dev_name(&pdev->dev), i2c);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "cannot claim IRQ %d\n", i2c->irq);
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(&pdev->dev, ret, "cannot claim IRQ %d\n", i2c->irq);
 
        i2c->state = STATE_IDLE;
        i2c->dev = &pdev->dev;
@@ -605,10 +599,8 @@ static int synquacer_i2c_probe(struct platform_device *pdev)
        synquacer_i2c_hw_init(i2c);
 
        ret = i2c_add_numbered_adapter(&i2c->adapter);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to add bus to i2c core\n");
-               return ret;
-       }
+       if (ret)
+               return dev_err_probe(&pdev->dev, ret, "failed to add bus to i2c core\n");
 
        platform_set_drvdata(pdev, i2c);
 
index bc3f945..b0840fa 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
index 03fc10b..920d5a8 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/ktime.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
index d1fa9ff..1bffe36 100644 (file)
@@ -222,6 +222,10 @@ static int i2c_tiny_usb_probe(struct usb_interface *interface,
        int retval = -ENOMEM;
        u16 version;
 
+       if (interface->intf_assoc &&
+           interface->intf_assoc->bFunctionClass != USB_CLASS_VENDOR_SPEC)
+               return -ENODEV;
+
        dev_dbg(&interface->dev, "probing usb device\n");
 
        /* allocate memory for our device state and initialize it */
index 4b9536f..c60ae53 100644 (file)
@@ -243,7 +243,6 @@ static struct virtio_device_id id_table[] = {
 };
 MODULE_DEVICE_TABLE(virtio, id_table);
 
-#ifdef CONFIG_PM_SLEEP
 static int virtio_i2c_freeze(struct virtio_device *vdev)
 {
        virtio_i2c_del_vqs(vdev);
@@ -254,7 +253,6 @@ static int virtio_i2c_restore(struct virtio_device *vdev)
 {
        return virtio_i2c_setup_vqs(vdev->priv);
 }
-#endif
 
 static const unsigned int features[] = {
        VIRTIO_I2C_F_ZERO_LENGTH_REQUEST,
@@ -269,10 +267,8 @@ static struct virtio_driver virtio_i2c_driver = {
        .driver                 = {
                .name   = "i2c_virtio",
        },
-#ifdef CONFIG_PM_SLEEP
-       .freeze = virtio_i2c_freeze,
-       .restore = virtio_i2c_restore,
-#endif
+       .freeze                 = pm_sleep_ptr(virtio_i2c_freeze),
+       .restore                = pm_sleep_ptr(virtio_i2c_restore),
 };
 module_virtio_driver(virtio_i2c_driver);
 
index f59e8c5..08a59a9 100644 (file)
@@ -529,10 +529,8 @@ static int xlp9xx_i2c_probe(struct platform_device *pdev)
 
        err = devm_request_irq(&pdev->dev, priv->irq, xlp9xx_i2c_isr, 0,
                               pdev->name, priv);
-       if (err) {
-               dev_err(&pdev->dev, "IRQ request failed!\n");
-               return err;
-       }
+       if (err)
+               return dev_err_probe(&pdev->dev, err, "IRQ request failed!\n");
 
        init_completion(&priv->msg_complete);
        priv->adapter.dev.parent = &pdev->dev;
index ea838db..db1b905 100644 (file)
@@ -65,11 +65,11 @@ config I2C_MUX_PCA9541
          will be called i2c-mux-pca9541.
 
 config I2C_MUX_PCA954x
-       tristate "NXP PCA954x and PCA984x I2C Mux/switches"
+       tristate "NXP PCA954x/PCA984x and Maxim MAX735x/MAX736x I2C Mux/switches"
        depends on GPIOLIB || COMPILE_TEST
        help
-         If you say yes here you get support for the NXP PCA954x
-         and PCA984x I2C mux/switch devices.
+         If you say yes here you get support for NXP PCA954x/PCA984x
+         and Maxim MAX735x/MAX736x I2C mux/switch devices.
 
          This driver can also be built as a module.  If so, the module
          will be called i2c-mux-pca954x.
index 0405af0..baccf4b 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/i2c-mux.h>
 #include <linux/module.h>
 #include <linux/mux/consumer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 
 struct mux {
index 5a03031..23766d8 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
@@ -62,7 +61,7 @@ static const struct chip_desc chips[] = {
 
 static bool ltc4306_is_volatile_reg(struct device *dev, unsigned int reg)
 {
-       return (reg == LTC_REG_CONFIG) ? true : false;
+       return reg == LTC_REG_CONFIG;
 }
 
 static const struct regmap_config ltc4306_regmap_config = {
index 0ccee2a..2219062 100644 (file)
  *      PCA9540, PCA9542, PCA9543, PCA9544, PCA9545, PCA9546, PCA9547,
  *      PCA9548, PCA9846, PCA9847, PCA9848 and PCA9849.
  *
+ * It's also compatible to Maxims MAX735x I2C switch chips, which are controlled
+ * as the NXP PCA9548 and the MAX736x chips that act like the PCA9544.
+ *
+ * This includes the:
+ *      MAX7356, MAX7357, MAX7358, MAX7367, MAX7368 and MAX7369
+ *
  * These chips are all controlled via the I2C bus itself, and all have a
  * single 8-bit register. The upstream "parent" bus fans out to two,
  * four, or eight downstream busses or channels; which of these
@@ -42,6 +48,7 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/property.h>
+#include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <dt-bindings/mux/mux.h>
 #define PCA954X_IRQ_OFFSET 4
 
 enum pca_type {
+       max_7356,
+       max_7357,
+       max_7358,
+       max_7367,
+       max_7368,
+       max_7369,
        pca_9540,
        pca_9542,
        pca_9543,
@@ -88,10 +101,52 @@ struct pca954x {
        struct irq_domain *irq;
        unsigned int irq_mask;
        raw_spinlock_t lock;
+       struct regulator *supply;
 };
 
-/* Provide specs for the PCA954x types we know about */
+/* Provide specs for the MAX735x, PCA954x and PCA984x types we know about */
 static const struct chip_desc chips[] = {
+       [max_7356] = {
+               .nchans = 8,
+               .muxtype = pca954x_isswi,
+               .id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
+       },
+       [max_7357] = {
+               .nchans = 8,
+               .muxtype = pca954x_isswi,
+               .id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
+               /*
+                * No interrupt controller support. The interrupt
+                * provides information about stuck channels.
+                */
+       },
+       [max_7358] = {
+               .nchans = 8,
+               .muxtype = pca954x_isswi,
+               .id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
+               /*
+                * No interrupt controller support. The interrupt
+                * provides information about stuck channels.
+                */
+       },
+       [max_7367] = {
+               .nchans = 4,
+               .muxtype = pca954x_isswi,
+               .has_irq = 1,
+               .id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
+       },
+       [max_7368] = {
+               .nchans = 4,
+               .muxtype = pca954x_isswi,
+               .id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
+       },
+       [max_7369] = {
+               .nchans = 4,
+               .enable = 0x4,
+               .muxtype = pca954x_ismux,
+               .has_irq = 1,
+               .id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
+       },
        [pca_9540] = {
                .nchans = 2,
                .enable = 0x4,
@@ -177,6 +232,12 @@ static const struct chip_desc chips[] = {
 };
 
 static const struct i2c_device_id pca954x_id[] = {
+       { "max7356", max_7356 },
+       { "max7357", max_7357 },
+       { "max7358", max_7358 },
+       { "max7367", max_7367 },
+       { "max7368", max_7368 },
+       { "max7369", max_7369 },
        { "pca9540", pca_9540 },
        { "pca9542", pca_9542 },
        { "pca9543", pca_9543 },
@@ -194,6 +255,12 @@ static const struct i2c_device_id pca954x_id[] = {
 MODULE_DEVICE_TABLE(i2c, pca954x_id);
 
 static const struct of_device_id pca954x_of_match[] = {
+       { .compatible = "maxim,max7356", .data = &chips[max_7356] },
+       { .compatible = "maxim,max7357", .data = &chips[max_7357] },
+       { .compatible = "maxim,max7358", .data = &chips[max_7358] },
+       { .compatible = "maxim,max7367", .data = &chips[max_7367] },
+       { .compatible = "maxim,max7368", .data = &chips[max_7368] },
+       { .compatible = "maxim,max7369", .data = &chips[max_7369] },
        { .compatible = "nxp,pca9540", .data = &chips[pca_9540] },
        { .compatible = "nxp,pca9542", .data = &chips[pca_9542] },
        { .compatible = "nxp,pca9543", .data = &chips[pca_9543] },
@@ -382,6 +449,8 @@ static void pca954x_cleanup(struct i2c_mux_core *muxc)
        struct pca954x *data = i2c_mux_priv(muxc);
        int c, irq;
 
+       regulator_disable(data->supply);
+
        if (data->irq) {
                for (c = 0; c < data->chip->nchans; c++) {
                        irq = irq_find_mapping(data->irq, c);
@@ -434,10 +503,22 @@ static int pca954x_probe(struct i2c_client *client)
        i2c_set_clientdata(client, muxc);
        data->client = client;
 
+       data->supply = devm_regulator_get(dev, "vdd");
+       if (IS_ERR(data->supply))
+               return dev_err_probe(dev, PTR_ERR(data->supply),
+                               "Failed to request regulator\n");
+
+       ret = regulator_enable(data->supply);
+       if (ret)
+               return dev_err_probe(dev, ret,
+                                    "Failed to enable vdd supply\n");
+
        /* Reset the mux if a reset GPIO is specified. */
        gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
-       if (IS_ERR(gpio))
-               return PTR_ERR(gpio);
+       if (IS_ERR(gpio)) {
+               ret = PTR_ERR(gpio);
+               goto fail_cleanup;
+       }
        if (gpio) {
                udelay(1);
                gpiod_set_value_cansleep(gpio, 0);
@@ -454,7 +535,7 @@ static int pca954x_probe(struct i2c_client *client)
 
                ret = i2c_get_device_id(client, &id);
                if (ret && ret != -EOPNOTSUPP)
-                       return ret;
+                       goto fail_cleanup;
 
                if (!ret &&
                    (id.manufacturer_id != data->chip->id.manufacturer_id ||
@@ -462,7 +543,8 @@ static int pca954x_probe(struct i2c_client *client)
                        dev_warn(dev, "unexpected device id %03x-%03x-%x\n",
                                 id.manufacturer_id, id.part_id,
                                 id.die_revision);
-                       return -ENODEV;
+                       ret = -ENODEV;
+                       goto fail_cleanup;
                }
        }
 
@@ -481,7 +563,8 @@ static int pca954x_probe(struct i2c_client *client)
        ret = pca954x_init(client, data);
        if (ret < 0) {
                dev_warn(dev, "probe failed\n");
-               return -ENODEV;
+               ret = -ENODEV;
+               goto fail_cleanup;
        }
 
        ret = pca954x_irq_setup(muxc);
@@ -530,7 +613,6 @@ static void pca954x_remove(struct i2c_client *client)
        pca954x_cleanup(muxc);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int pca954x_resume(struct device *dev)
 {
        struct i2c_client *client = to_i2c_client(dev);
@@ -544,14 +626,13 @@ static int pca954x_resume(struct device *dev)
 
        return ret;
 }
-#endif
 
-static SIMPLE_DEV_PM_OPS(pca954x_pm, NULL, pca954x_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(pca954x_pm, NULL, pca954x_resume);
 
 static struct i2c_driver pca954x_driver = {
        .driver         = {
                .name   = "pca954x",
-               .pm     = &pca954x_pm,
+               .pm     = pm_sleep_ptr(&pca954x_pm),
                .of_match_table = pca954x_of_match,
        },
        .probe          = pca954x_probe,
index 08aeb69..87283e4 100644 (file)
@@ -1308,7 +1308,11 @@ static int i3c_master_get_i3c_addrs(struct i3c_dev_desc *dev)
        if (dev->info.static_addr) {
                status = i3c_bus_get_addr_slot_status(&master->bus,
                                                      dev->info.static_addr);
-               if (status != I3C_ADDR_SLOT_FREE)
+               /* Since static address and assigned dynamic address can be
+                * equal, allow this case to pass.
+                */
+               if (status != I3C_ADDR_SLOT_FREE &&
+                   dev->info.static_addr != dev->boardinfo->init_dyn_addr)
                        return -EBUSY;
 
                i3c_bus_set_addr_slot_status(&master->bus,
index 09ed19d..01a47d3 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
index 01610fa..49551db 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
-#include <linux/of_device.h>
 
 #define DEV_ID                         0x0
 #define DEV_ID_I3C_MASTER              0x5034
index d97c317..6a781f8 100644 (file)
@@ -339,7 +339,7 @@ static int hci_cmd_v1_daa(struct i3c_hci *hci)
                        break;
                }
                if (RESP_STATUS(xfer[0].response) == RESP_ERR_NACK &&
-                   RESP_STATUS(xfer[0].response) == 1) {
+                   RESP_DATA_LENGTH(xfer->response) == 1) {
                        ret = 0;  /* no more devices to be assigned */
                        break;
                }
index 0d63b73..8f8295a 100644 (file)
@@ -156,6 +156,7 @@ struct svc_i3c_regs_save {
  * @base: I3C master controller
  * @dev: Corresponding device
  * @regs: Memory mapping
+ * @saved_regs: Volatile values for PM operations
  * @free_slots: Bit array of available slots
  * @addrs: Array containing the dynamic addresses of each attached device
  * @descs: Array of descriptors, one per attached device
@@ -789,6 +790,10 @@ static int svc_i3c_master_do_daa_locked(struct svc_i3c_master *master,
                                 */
                                break;
                        } else if (SVC_I3C_MSTATUS_NACKED(reg)) {
+                               /* No I3C devices attached */
+                               if (dev_nb == 0)
+                                       break;
+
                                /*
                                 * A slave device nacked the address, this is
                                 * allowed only once, DAA will be stopped and
@@ -1263,11 +1268,17 @@ static int svc_i3c_master_send_ccc_cmd(struct i3c_master_controller *m,
 {
        struct svc_i3c_master *master = to_svc_i3c_master(m);
        bool broadcast = cmd->id < 0x80;
+       int ret;
 
        if (broadcast)
-               return svc_i3c_master_send_bdcast_ccc_cmd(master, cmd);
+               ret = svc_i3c_master_send_bdcast_ccc_cmd(master, cmd);
        else
-               return svc_i3c_master_send_direct_ccc_cmd(master, cmd);
+               ret = svc_i3c_master_send_direct_ccc_cmd(master, cmd);
+
+       if (ret)
+               cmd->err = I3C_ERROR_M2;
+
+       return ret;
 }
 
 static int svc_i3c_master_priv_xfers(struct i3c_dev_desc *dev,
@@ -1518,8 +1529,8 @@ static int svc_i3c_master_probe(struct platform_device *pdev)
                return PTR_ERR(master->sclk);
 
        master->irq = platform_get_irq(pdev, 0);
-       if (master->irq <= 0)
-               return -ENOENT;
+       if (master->irq < 0)
+               return master->irq;
 
        master->dev = dev;
 
index 5a2c2fb..fe73b26 100644 (file)
@@ -25,6 +25,7 @@ if GAMEPORT
 
 config GAMEPORT_NS558
        tristate "Classic ISA and PnP gameport support"
+       depends on ISA
        help
          Say Y here if you have an ISA or PnP gameport.
 
@@ -35,6 +36,7 @@ config GAMEPORT_NS558
 
 config GAMEPORT_L4
        tristate "PDPI Lightning 4 gamecard support"
+       depends on ISA
        help
          Say Y here if you have a PDPI Lightning 4 gamecard.
 
@@ -53,7 +55,7 @@ config GAMEPORT_EMU10K1
 
 config GAMEPORT_FM801
        tristate "ForteMedia FM801 gameport support"
-       depends on PCI
+       depends on PCI && HAS_IOPORT
        help
          Say Y here if you have ForteMedia FM801 PCI audio controller
          (Abit AU10, Genius Sound Maker, HP Workstation zx2000,
index a144332..34f416a 100644 (file)
@@ -519,12 +519,32 @@ EXPORT_SYMBOL(gameport_set_phys);
 
 static void gameport_default_trigger(struct gameport *gameport)
 {
+#ifdef CONFIG_HAS_IOPORT
        outb(0xff, gameport->io);
+#endif
 }
 
 static unsigned char gameport_default_read(struct gameport *gameport)
 {
+#ifdef CONFIG_HAS_IOPORT
        return inb(gameport->io);
+#else
+       return 0xff;
+#endif
+}
+
+static void gameport_setup_default_handlers(struct gameport *gameport)
+{
+       if ((!gameport->trigger || !gameport->read) &&
+           !IS_ENABLED(CONFIG_HAS_IOPORT))
+               dev_err(&gameport->dev,
+                       "I/O port access is required for %s (%s) but is not available\n",
+                       gameport->phys, gameport->name);
+
+       if (!gameport->trigger)
+               gameport->trigger = gameport_default_trigger;
+       if (!gameport->read)
+               gameport->read = gameport_default_read;
 }
 
 /*
@@ -545,11 +565,7 @@ static void gameport_init_port(struct gameport *gameport)
        if (gameport->parent)
                gameport->dev.parent = &gameport->parent->dev;
 
-       if (!gameport->trigger)
-               gameport->trigger = gameport_default_trigger;
-       if (!gameport->read)
-               gameport->read = gameport_default_read;
-
+       gameport_setup_default_handlers(gameport);
        INIT_LIST_HEAD(&gameport->node);
        spin_lock_init(&gameport->timer_lock);
        timer_setup(&gameport->poll_timer, gameport_run_poll_handler, 0);
index cdb1933..ede3805 100644 (file)
@@ -264,6 +264,7 @@ static const struct xpad_device {
        { 0x0f0d, 0x0067, "HORIPAD ONE", 0, XTYPE_XBOXONE },
        { 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
        { 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
+       { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX },
        { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX },
        { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
@@ -365,6 +366,7 @@ static const struct xpad_device {
        { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 },
        { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 },
        { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 },
+       { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 },
        { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX },
        { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
        { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
@@ -499,6 +501,8 @@ static const struct usb_device_id xpad_table[] = {
        XPAD_XBOX360_VENDOR(0x2f24),            /* GameSir controllers */
        XPAD_XBOX360_VENDOR(0x31e3),            /* Wooting Keyboards */
        XPAD_XBOX360_VENDOR(0x3285),            /* Nacon GC-100 */
+       XPAD_XBOX360_VENDOR(0x3537),            /* GameSir Controllers */
+       XPAD_XBOXONE_VENDOR(0x3537),            /* GameSir Controllers */
        { }
 };
 
@@ -1720,6 +1724,27 @@ static int xpad_start_input(struct usb_xpad *xpad)
                        return error;
                }
        }
+       if (xpad->xtype == XTYPE_XBOX360) {
+               /*
+                * Some third-party controllers Xbox 360-style controllers
+                * require this message to finish initialization.
+                */
+               u8 dummy[20];
+
+               error = usb_control_msg_recv(xpad->udev, 0,
+                                            /* bRequest */ 0x01,
+                                            /* bmRequestType */
+                                            USB_TYPE_VENDOR | USB_DIR_IN |
+                                               USB_RECIP_INTERFACE,
+                                            /* wValue */ 0x100,
+                                            /* wIndex */ 0x00,
+                                            dummy, sizeof(dummy),
+                                            25, GFP_KERNEL);
+               if (error)
+                       dev_warn(&xpad->dev->dev,
+                                "unable to receive magic message: %d\n",
+                                error);
+       }
 
        return 0;
 }
index 896a5a9..61e8e43 100644 (file)
@@ -713,17 +713,11 @@ static int adp5588_fw_parse(struct adp5588_kpad *kpad)
        return 0;
 }
 
-static void adp5588_disable_regulator(void *reg)
-{
-       regulator_disable(reg);
-}
-
 static int adp5588_probe(struct i2c_client *client)
 {
        struct adp5588_kpad *kpad;
        struct input_dev *input;
        struct gpio_desc *gpio;
-       struct regulator *vcc;
        unsigned int revid;
        int ret;
        int error;
@@ -749,16 +743,7 @@ static int adp5588_probe(struct i2c_client *client)
        if (error)
                return error;
 
-       vcc = devm_regulator_get(&client->dev, "vcc");
-       if (IS_ERR(vcc))
-               return PTR_ERR(vcc);
-
-       error = regulator_enable(vcc);
-       if (error)
-               return error;
-
-       error = devm_add_action_or_reset(&client->dev,
-                                        adp5588_disable_regulator, vcc);
+       error = devm_regulator_get_enable(&client->dev, "vcc");
        if (error)
                return error;
 
index a20a4e1..e305c44 100644 (file)
@@ -196,7 +196,7 @@ static int __init amikbd_probe(struct platform_device *pdev)
        struct input_dev *dev;
        int i, err;
 
-       dev = input_allocate_device();
+       dev = devm_input_allocate_device(&pdev->dev);
        if (!dev) {
                dev_err(&pdev->dev, "Not enough memory for input device\n");
                return -ENOMEM;
@@ -208,7 +208,6 @@ static int __init amikbd_probe(struct platform_device *pdev)
        dev->id.vendor = 0x0001;
        dev->id.product = 0x0001;
        dev->id.version = 0x0100;
-       dev->dev.parent = &pdev->dev;
 
        dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 
@@ -218,35 +217,21 @@ static int __init amikbd_probe(struct platform_device *pdev)
        amikbd_init_console_keymaps();
 
        ciaa.cra &= ~0x41;       /* serial data in, turn off TA */
-       err = request_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt, 0, "amikbd",
-                         dev);
+       err = devm_request_irq(&pdev->dev, IRQ_AMIGA_CIAA_SP, amikbd_interrupt,
+                              0, "amikbd", dev);
        if (err)
-               goto fail2;
+               return err;
 
        err = input_register_device(dev);
        if (err)
-               goto fail3;
+               return err;
 
        platform_set_drvdata(pdev, dev);
 
        return 0;
-
- fail3:        free_irq(IRQ_AMIGA_CIAA_SP, dev);
- fail2:        input_free_device(dev);
-       return err;
-}
-
-static int __exit amikbd_remove(struct platform_device *pdev)
-{
-       struct input_dev *dev = platform_get_drvdata(pdev);
-
-       free_irq(IRQ_AMIGA_CIAA_SP, dev);
-       input_unregister_device(dev);
-       return 0;
 }
 
 static struct platform_driver amikbd_driver = {
-       .remove = __exit_p(amikbd_remove),
        .driver   = {
                .name   = "amiga-keyboard",
        },
index 56a919e..f3c3746 100644 (file)
@@ -307,7 +307,6 @@ static int bcm_kp_probe(struct platform_device *pdev)
 {
        struct bcm_kp *kp;
        struct input_dev *input_dev;
-       struct resource *res;
        int error;
 
        kp = devm_kzalloc(&pdev->dev, sizeof(*kp), GFP_KERNEL);
@@ -353,29 +352,16 @@ static int bcm_kp_probe(struct platform_device *pdev)
                return error;
        }
 
-       /* Get the KEYPAD base address */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "Missing keypad base address resource\n");
-               return -ENODEV;
-       }
-
-       kp->base = devm_ioremap_resource(&pdev->dev, res);
+       kp->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(kp->base))
                return PTR_ERR(kp->base);
 
        /* Enable clock */
-       kp->clk = devm_clk_get(&pdev->dev, "peri_clk");
+       kp->clk = devm_clk_get_optional(&pdev->dev, "peri_clk");
        if (IS_ERR(kp->clk)) {
-               error = PTR_ERR(kp->clk);
-               if (error != -ENOENT) {
-                       if (error != -EPROBE_DEFER)
-                               dev_err(&pdev->dev, "Failed to get clock\n");
-                       return error;
-               }
-               dev_dbg(&pdev->dev,
-                       "No clock specified. Assuming it's enabled\n");
-               kp->clk = NULL;
+               return dev_err_probe(&pdev->dev, PTR_ERR(kp->clk), "Failed to get clock\n");
+       } else if (!kp->clk) {
+               dev_dbg(&pdev->dev, "No clock specified. Assuming it's enabled\n");
        } else {
                unsigned int desired_rate;
                long actual_rate;
index c928829..2e7c2c0 100644 (file)
@@ -523,18 +523,15 @@ static int gpio_keys_setup_key(struct platform_device *pdev,
                                                     NULL, GPIOD_IN, desc);
                if (IS_ERR(bdata->gpiod)) {
                        error = PTR_ERR(bdata->gpiod);
-                       if (error == -ENOENT) {
-                               /*
-                                * GPIO is optional, we may be dealing with
-                                * purely interrupt-driven setup.
-                                */
-                               bdata->gpiod = NULL;
-                       } else {
-                               if (error != -EPROBE_DEFER)
-                                       dev_err(dev, "failed to get gpio: %d\n",
-                                               error);
-                               return error;
-                       }
+                       if (error != -ENOENT)
+                               return dev_err_probe(dev, error,
+                                                    "failed to get gpio\n");
+
+                       /*
+                        * GPIO is optional, we may be dealing with
+                        * purely interrupt-driven setup.
+                        */
+                       bdata->gpiod = NULL;
                }
        } else if (gpio_is_valid(button->gpio)) {
                /*
index c3937d2..ba00ecf 100644 (file)
@@ -299,13 +299,9 @@ static int gpio_keys_polled_probe(struct platform_device *pdev)
                                                             NULL, GPIOD_IN,
                                                             button->desc);
                        if (IS_ERR(bdata->gpiod)) {
-                               error = PTR_ERR(bdata->gpiod);
-                               if (error != -EPROBE_DEFER)
-                                       dev_err(dev,
-                                               "failed to get gpio: %d\n",
-                                               error);
                                fwnode_handle_put(child);
-                               return error;
+                               return dev_err_probe(dev, PTR_ERR(bdata->gpiod),
+                                                    "failed to get gpio\n");
                        }
                } else if (gpio_is_valid(button->gpio)) {
                        /*
index 3964f6e..7bee93e 100644 (file)
@@ -556,6 +556,7 @@ static int init_pwm(struct lm8323_chip *lm, int id, struct device *dev,
                    const char *name)
 {
        struct lm8323_pwm *pwm;
+       int err;
 
        BUG_ON(id > 3);
 
@@ -575,9 +576,11 @@ static int init_pwm(struct lm8323_chip *lm, int id, struct device *dev,
                pwm->cdev.name = name;
                pwm->cdev.brightness_set = lm8323_pwm_set_brightness;
                pwm->cdev.groups = lm8323_pwm_groups;
-               if (led_classdev_register(dev, &pwm->cdev) < 0) {
-                       dev_err(dev, "couldn't register PWM %d\n", id);
-                       return -1;
+
+               err = devm_led_classdev_register(dev, &pwm->cdev);
+               if (err) {
+                       dev_err(dev, "couldn't register PWM %d: %d\n", id, err);
+                       return err;
                }
                pwm->enabled = true;
        }
@@ -585,8 +588,6 @@ static int init_pwm(struct lm8323_chip *lm, int id, struct device *dev,
        return 0;
 }
 
-static struct i2c_driver lm8323_i2c_driver;
-
 static ssize_t lm8323_show_disable(struct device *dev,
                                   struct device_attribute *attr, char *buf)
 {
@@ -615,6 +616,12 @@ static ssize_t lm8323_set_disable(struct device *dev,
 }
 static DEVICE_ATTR(disable_kp, 0644, lm8323_show_disable, lm8323_set_disable);
 
+static struct attribute *lm8323_attrs[] = {
+       &dev_attr_disable_kp.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(lm8323);
+
 static int lm8323_probe(struct i2c_client *client)
 {
        struct lm8323_platform_data *pdata = dev_get_platdata(&client->dev);
@@ -642,12 +649,13 @@ static int lm8323_probe(struct i2c_client *client)
                return -EINVAL;
        }
 
-       lm = kzalloc(sizeof *lm, GFP_KERNEL);
-       idev = input_allocate_device();
-       if (!lm || !idev) {
-               err = -ENOMEM;
-               goto fail1;
-       }
+       lm = devm_kzalloc(&client->dev, sizeof(*lm), GFP_KERNEL);
+       if (!lm)
+               return -ENOMEM;
+
+       idev = devm_input_allocate_device(&client->dev);
+       if (!idev)
+               return -ENOMEM;
 
        lm->client = client;
        lm->idev = idev;
@@ -663,8 +671,10 @@ static int lm8323_probe(struct i2c_client *client)
 
        lm8323_reset(lm);
 
-       /* Nothing's set up to service the IRQ yet, so just spin for max.
-        * 100ms until we can configure. */
+       /*
+        * Nothing's set up to service the IRQ yet, so just spin for max.
+        * 100ms until we can configure.
+        */
        tmo = jiffies + msecs_to_jiffies(100);
        while (lm8323_read(lm, LM8323_CMD_READ_INT, data, 1) == 1) {
                if (data[0] & INT_NOINIT)
@@ -684,21 +694,17 @@ static int lm8323_probe(struct i2c_client *client)
        /* If a true probe check the device */
        if (lm8323_read_id(lm, data) != 0) {
                dev_err(&client->dev, "device not found\n");
-               err = -ENODEV;
-               goto fail1;
+               return -ENODEV;
        }
 
        for (pwm = 0; pwm < LM8323_NUM_PWMS; pwm++) {
                err = init_pwm(lm, pwm + 1, &client->dev,
                               pdata->pwm_names[pwm]);
-               if (err < 0)
-                       goto fail2;
+               if (err)
+                       return err;
        }
 
        lm->kp_enabled = true;
-       err = device_create_file(&client->dev, &dev_attr_disable_kp);
-       if (err < 0)
-               goto fail2;
 
        idev->name = pdata->name ? : "LM8323 keypad";
        snprintf(lm->phys, sizeof(lm->phys),
@@ -719,14 +725,16 @@ static int lm8323_probe(struct i2c_client *client)
        err = input_register_device(idev);
        if (err) {
                dev_dbg(&client->dev, "error registering input device\n");
-               goto fail3;
+               return err;
        }
 
-       err = request_threaded_irq(client->irq, NULL, lm8323_irq,
-                         IRQF_TRIGGER_LOW|IRQF_ONESHOT, "lm8323", lm);
+       err = devm_request_threaded_irq(&client->dev, client->irq,
+                                       NULL, lm8323_irq,
+                                       IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+                                       "lm8323", lm);
        if (err) {
                dev_err(&client->dev, "could not get IRQ %d\n", client->irq);
-               goto fail4;
+               return err;
        }
 
        i2c_set_clientdata(client, lm);
@@ -735,39 +743,6 @@ static int lm8323_probe(struct i2c_client *client)
        enable_irq_wake(client->irq);
 
        return 0;
-
-fail4:
-       input_unregister_device(idev);
-       idev = NULL;
-fail3:
-       device_remove_file(&client->dev, &dev_attr_disable_kp);
-fail2:
-       while (--pwm >= 0)
-               if (lm->pwm[pwm].enabled)
-                       led_classdev_unregister(&lm->pwm[pwm].cdev);
-fail1:
-       input_free_device(idev);
-       kfree(lm);
-       return err;
-}
-
-static void lm8323_remove(struct i2c_client *client)
-{
-       struct lm8323_chip *lm = i2c_get_clientdata(client);
-       int i;
-
-       disable_irq_wake(client->irq);
-       free_irq(client->irq, lm);
-
-       input_unregister_device(lm->idev);
-
-       device_remove_file(&lm->client->dev, &dev_attr_disable_kp);
-
-       for (i = 0; i < 3; i++)
-               if (lm->pwm[i].enabled)
-                       led_classdev_unregister(&lm->pwm[i].cdev);
-
-       kfree(lm);
 }
 
 /*
@@ -823,11 +798,11 @@ static const struct i2c_device_id lm8323_id[] = {
 
 static struct i2c_driver lm8323_i2c_driver = {
        .driver = {
-               .name   = "lm8323",
-               .pm     = pm_sleep_ptr(&lm8323_pm_ops),
+               .name           = "lm8323",
+               .pm             = pm_sleep_ptr(&lm8323_pm_ops),
+               .dev_groups     = lm8323_groups,
        },
        .probe          = lm8323_probe,
-       .remove         = lm8323_remove,
        .id_table       = lm8323_id,
 };
 MODULE_DEVICE_TABLE(i2c, lm8323_id);
index c9f0576..1c070c4 100644 (file)
@@ -142,18 +142,18 @@ static int lm8333_probe(struct i2c_client *client)
                return -EINVAL;
        }
 
-       lm8333 = kzalloc(sizeof(*lm8333), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!lm8333 || !input) {
-               err = -ENOMEM;
-               goto free_mem;
-       }
+       lm8333 = devm_kzalloc(&client->dev, sizeof(*lm8333), GFP_KERNEL);
+       if (!lm8333)
+               return -ENOMEM;
+
+       input = devm_input_allocate_device(&client->dev);
+       if (!input)
+               return -ENOMEM;
 
        lm8333->client = client;
        lm8333->input = input;
 
        input->name = client->name;
-       input->dev.parent = &client->dev;
        input->id.bustype = BUS_I2C;
 
        input_set_capability(input, EV_MSC, MSC_SCAN);
@@ -162,7 +162,7 @@ static int lm8333_probe(struct i2c_client *client)
                                         LM8333_NUM_ROWS, LM8333_NUM_COLS,
                                         lm8333->keycodes, input);
        if (err)
-               goto free_mem;
+               return err;
 
        if (pdata->debounce_time) {
                err = lm8333_write8(lm8333, LM8333_DEBOUNCE,
@@ -178,34 +178,19 @@ static int lm8333_probe(struct i2c_client *client)
                        dev_warn(&client->dev, "Unable to set active time\n");
        }
 
-       err = request_threaded_irq(client->irq, NULL, lm8333_irq_thread,
-                                  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-                                  "lm8333", lm8333);
+       err = devm_request_threaded_irq(&client->dev, client->irq,
+                                       NULL, lm8333_irq_thread,
+                                       IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+                                       "lm8333", lm8333);
        if (err)
-               goto free_mem;
+               return err;
 
        err = input_register_device(input);
        if (err)
-               goto free_irq;
+               return err;
 
        i2c_set_clientdata(client, lm8333);
        return 0;
-
- free_irq:
-       free_irq(client->irq, lm8333);
- free_mem:
-       input_free_device(input);
-       kfree(lm8333);
-       return err;
-}
-
-static void lm8333_remove(struct i2c_client *client)
-{
-       struct lm8333 *lm8333 = i2c_get_clientdata(client);
-
-       free_irq(client->irq, lm8333);
-       input_unregister_device(lm8333->input);
-       kfree(lm8333);
 }
 
 static const struct i2c_device_id lm8333_id[] = {
@@ -219,7 +204,6 @@ static struct i2c_driver lm8333_driver = {
                .name           = "lm8333",
        },
        .probe          = lm8333_probe,
-       .remove         = lm8333_remove,
        .id_table       = lm8333_id,
 };
 module_i2c_driver(lm8333_driver);
index 911e118..322a878 100644 (file)
@@ -160,17 +160,10 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 {
        struct lpc32xx_kscan_drv *kscandat;
        struct input_dev *input;
-       struct resource *res;
        size_t keymap_size;
        int error;
        int irq;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "failed to get platform I/O memory\n");
-               return -EINVAL;
-       }
-
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return -EINVAL;
@@ -221,7 +214,7 @@ static int lpc32xx_kscan_probe(struct platform_device *pdev)
 
        input_set_drvdata(kscandat->input, kscandat);
 
-       kscandat->kscan_base = devm_ioremap_resource(&pdev->dev, res);
+       kscandat->kscan_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(kscandat->kscan_base))
                return PTR_ERR(kscandat->kscan_base);
 
index de312d8..2410f67 100644 (file)
@@ -92,6 +92,13 @@ static irqreturn_t mcs_touchkey_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static void mcs_touchkey_poweroff(void *data)
+{
+       struct mcs_touchkey_data *touchkey = data;
+
+       touchkey->poweron(false);
+}
+
 static int mcs_touchkey_probe(struct i2c_client *client)
 {
        const struct i2c_device_id *id = i2c_client_get_device_id(client);
@@ -109,13 +116,16 @@ static int mcs_touchkey_probe(struct i2c_client *client)
                return -EINVAL;
        }
 
-       data = kzalloc(struct_size(data, keycodes, pdata->key_maxval + 1),
-                      GFP_KERNEL);
-       input_dev = input_allocate_device();
-       if (!data || !input_dev) {
-               dev_err(&client->dev, "Failed to allocate memory\n");
-               error = -ENOMEM;
-               goto err_free_mem;
+       data = devm_kzalloc(&client->dev,
+                           struct_size(data, keycodes, pdata->key_maxval + 1),
+                           GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       input_dev = devm_input_allocate_device(&client->dev);
+       if (!input_dev) {
+               dev_err(&client->dev, "Failed to allocate input device\n");
+               return -ENOMEM;
        }
 
        data->client = client;
@@ -136,15 +146,13 @@ static int mcs_touchkey_probe(struct i2c_client *client)
 
        fw_ver = i2c_smbus_read_byte_data(client, fw_reg);
        if (fw_ver < 0) {
-               error = fw_ver;
-               dev_err(&client->dev, "i2c read error[%d]\n", error);
-               goto err_free_mem;
+               dev_err(&client->dev, "i2c read error[%d]\n", fw_ver);
+               return fw_ver;
        }
        dev_info(&client->dev, "Firmware version: %d\n", fw_ver);
 
        input_dev->name = "MELFAS MCS Touchkey";
        input_dev->id.bustype = BUS_I2C;
-       input_dev->dev.parent = &client->dev;
        input_dev->evbit[0] = BIT_MASK(EV_KEY);
        if (!pdata->no_autorepeat)
                input_dev->evbit[0] |= BIT_MASK(EV_REP);
@@ -169,40 +177,28 @@ static int mcs_touchkey_probe(struct i2c_client *client)
        if (pdata->poweron) {
                data->poweron = pdata->poweron;
                data->poweron(true);
+
+               error = devm_add_action_or_reset(&client->dev,
+                                                mcs_touchkey_poweroff, data);
+               if (error)
+                       return error;
        }
 
-       error = request_threaded_irq(client->irq, NULL, mcs_touchkey_interrupt,
-                                    IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-                                    client->dev.driver->name, data);
+       error = devm_request_threaded_irq(&client->dev, client->irq,
+                                         NULL, mcs_touchkey_interrupt,
+                                         IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+                                         client->dev.driver->name, data);
        if (error) {
                dev_err(&client->dev, "Failed to register interrupt\n");
-               goto err_free_mem;
+               return error;
        }
 
        error = input_register_device(input_dev);
        if (error)
-               goto err_free_irq;
+               return error;
 
        i2c_set_clientdata(client, data);
        return 0;
-
-err_free_irq:
-       free_irq(client->irq, data);
-err_free_mem:
-       input_free_device(input_dev);
-       kfree(data);
-       return error;
-}
-
-static void mcs_touchkey_remove(struct i2c_client *client)
-{
-       struct mcs_touchkey_data *data = i2c_get_clientdata(client);
-
-       free_irq(client->irq, data);
-       if (data->poweron)
-               data->poweron(false);
-       input_unregister_device(data->input_dev);
-       kfree(data);
 }
 
 static void mcs_touchkey_shutdown(struct i2c_client *client)
@@ -259,7 +255,6 @@ static struct i2c_driver mcs_touchkey_driver = {
                .pm     = pm_sleep_ptr(&mcs_touchkey_pm_ops),
        },
        .probe          = mcs_touchkey_probe,
-       .remove         = mcs_touchkey_remove,
        .shutdown       = mcs_touchkey_shutdown,
        .id_table       = mcs_touchkey_id,
 };
index 970f2a6..b3ccc97 100644 (file)
@@ -221,13 +221,20 @@ static irqreturn_t ske_keypad_irq(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static void ske_keypad_board_exit(void *data)
+{
+       struct ske_keypad *keypad = data;
+
+       keypad->board->exit();
+}
+
 static int __init ske_keypad_probe(struct platform_device *pdev)
 {
        const struct ske_keypad_platform_data *plat =
                        dev_get_platdata(&pdev->dev);
+       struct device *dev = &pdev->dev;
        struct ske_keypad *keypad;
        struct input_dev *input;
-       struct resource *res;
        int irq;
        int error;
 
@@ -238,20 +245,14 @@ static int __init ske_keypad_probe(struct platform_device *pdev)
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
-               return -EINVAL;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "missing platform resources\n");
-               return -EINVAL;
-       }
+               return irq;
 
-       keypad = kzalloc(sizeof(struct ske_keypad), GFP_KERNEL);
-       input = input_allocate_device();
+       keypad = devm_kzalloc(dev, sizeof(struct ske_keypad),
+                             GFP_KERNEL);
+       input = devm_input_allocate_device(dev);
        if (!keypad || !input) {
                dev_err(&pdev->dev, "failed to allocate keypad memory\n");
-               error = -ENOMEM;
-               goto err_free_mem;
+               return -ENOMEM;
        }
 
        keypad->irq = irq;
@@ -259,31 +260,20 @@ static int __init ske_keypad_probe(struct platform_device *pdev)
        keypad->input = input;
        spin_lock_init(&keypad->ske_keypad_lock);
 
-       if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
-               dev_err(&pdev->dev, "failed to request I/O memory\n");
-               error = -EBUSY;
-               goto err_free_mem;
-       }
-
-       keypad->reg_base = ioremap(res->start, resource_size(res));
-       if (!keypad->reg_base) {
-               dev_err(&pdev->dev, "failed to remap I/O memory\n");
-               error = -ENXIO;
-               goto err_free_mem_region;
-       }
+       keypad->reg_base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(keypad->reg_base))
+               return PTR_ERR(keypad->reg_base);
 
-       keypad->pclk = clk_get(&pdev->dev, "apb_pclk");
+       keypad->pclk = devm_clk_get_enabled(dev, "apb_pclk");
        if (IS_ERR(keypad->pclk)) {
                dev_err(&pdev->dev, "failed to get pclk\n");
-               error = PTR_ERR(keypad->pclk);
-               goto err_iounmap;
+               return PTR_ERR(keypad->pclk);
        }
 
-       keypad->clk = clk_get(&pdev->dev, NULL);
+       keypad->clk = devm_clk_get_enabled(dev, NULL);
        if (IS_ERR(keypad->clk)) {
                dev_err(&pdev->dev, "failed to get clk\n");
-               error = PTR_ERR(keypad->clk);
-               goto err_pclk;
+               return PTR_ERR(keypad->clk);
        }
 
        input->id.bustype = BUS_HOST;
@@ -295,48 +285,43 @@ static int __init ske_keypad_probe(struct platform_device *pdev)
                                           keypad->keymap, input);
        if (error) {
                dev_err(&pdev->dev, "Failed to build keymap\n");
-               goto err_clk;
+               return error;
        }
 
        input_set_capability(input, EV_MSC, MSC_SCAN);
        if (!plat->no_autorepeat)
                __set_bit(EV_REP, input->evbit);
 
-       error = clk_prepare_enable(keypad->pclk);
-       if (error) {
-               dev_err(&pdev->dev, "Failed to prepare/enable pclk\n");
-               goto err_clk;
-       }
-
-       error = clk_prepare_enable(keypad->clk);
-       if (error) {
-               dev_err(&pdev->dev, "Failed to prepare/enable clk\n");
-               goto err_pclk_disable;
-       }
-
-
        /* go through board initialization helpers */
        if (keypad->board->init)
                keypad->board->init();
 
+       if (keypad->board->exit) {
+               error = devm_add_action_or_reset(dev, ske_keypad_board_exit,
+                                                keypad);
+               if (error)
+                       return error;
+       }
+
        error = ske_keypad_chip_init(keypad);
        if (error) {
                dev_err(&pdev->dev, "unable to init keypad hardware\n");
-               goto err_clk_disable;
+               return error;
        }
 
-       error = request_threaded_irq(keypad->irq, NULL, ske_keypad_irq,
-                                    IRQF_ONESHOT, "ske-keypad", keypad);
+       error = devm_request_threaded_irq(dev, keypad->irq,
+                                         NULL, ske_keypad_irq,
+                                         IRQF_ONESHOT, "ske-keypad", keypad);
        if (error) {
                dev_err(&pdev->dev, "allocate irq %d failed\n", keypad->irq);
-               goto err_clk_disable;
+               return error;
        }
 
        error = input_register_device(input);
        if (error) {
                dev_err(&pdev->dev,
-                               "unable to register input device: %d\n", error);
-               goto err_free_irq;
+                       "unable to register input device: %d\n", error);
+               return error;
        }
 
        if (plat->wakeup_enable)
@@ -345,47 +330,6 @@ static int __init ske_keypad_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, keypad);
 
        return 0;
-
-err_free_irq:
-       free_irq(keypad->irq, keypad);
-err_clk_disable:
-       clk_disable_unprepare(keypad->clk);
-err_pclk_disable:
-       clk_disable_unprepare(keypad->pclk);
-err_clk:
-       clk_put(keypad->clk);
-err_pclk:
-       clk_put(keypad->pclk);
-err_iounmap:
-       iounmap(keypad->reg_base);
-err_free_mem_region:
-       release_mem_region(res->start, resource_size(res));
-err_free_mem:
-       input_free_device(input);
-       kfree(keypad);
-       return error;
-}
-
-static int ske_keypad_remove(struct platform_device *pdev)
-{
-       struct ske_keypad *keypad = platform_get_drvdata(pdev);
-       struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-       free_irq(keypad->irq, keypad);
-
-       input_unregister_device(keypad->input);
-
-       clk_disable_unprepare(keypad->clk);
-       clk_put(keypad->clk);
-
-       if (keypad->board->exit)
-               keypad->board->exit();
-
-       iounmap(keypad->reg_base);
-       release_mem_region(res->start, resource_size(res));
-       kfree(keypad);
-
-       return 0;
 }
 
 static int ske_keypad_suspend(struct device *dev)
@@ -424,7 +368,6 @@ static struct platform_driver ske_keypad_driver = {
                .name = "nmk-ske-keypad",
                .pm = pm_sleep_ptr(&ske_keypad_dev_pm_ops),
        },
-       .remove = ske_keypad_remove,
 };
 
 module_platform_driver_probe(ske_keypad_driver, ske_keypad_probe);
index e9fa142..096c18d 100644 (file)
@@ -186,8 +186,7 @@ static int nspire_keypad_probe(struct platform_device *pdev)
                return PTR_ERR(keypad->clk);
        }
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       keypad->reg_base = devm_ioremap_resource(&pdev->dev, res);
+       keypad->reg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(keypad->reg_base))
                return PTR_ERR(keypad->reg_base);
 
index 9f085d5..773e55e 100644 (file)
@@ -341,17 +341,10 @@ static int omap4_keypad_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct omap4_keypad *keypad_data;
        struct input_dev *input_dev;
-       struct resource *res;
        unsigned int max_keys;
        int irq;
        int error;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "no base address specified\n");
-               return -EINVAL;
-       }
-
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return irq;
@@ -370,7 +363,7 @@ static int omap4_keypad_probe(struct platform_device *pdev)
        if (error)
                return error;
 
-       keypad_data->base = devm_ioremap_resource(dev, res);
+       keypad_data->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(keypad_data->base))
                return PTR_ERR(keypad_data->base);
 
index b0ea387..7ffe1a7 100644 (file)
@@ -39,15 +39,8 @@ static int opencores_kbd_probe(struct platform_device *pdev)
 {
        struct input_dev *input;
        struct opencores_kbd *opencores_kbd;
-       struct resource *res;
        int irq, i, error;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "missing board memory resource\n");
-               return -EINVAL;
-       }
-
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return -EINVAL;
@@ -65,7 +58,7 @@ static int opencores_kbd_probe(struct platform_device *pdev)
 
        opencores_kbd->input = input;
 
-       opencores_kbd->addr = devm_ioremap_resource(&pdev->dev, res);
+       opencores_kbd->addr = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(opencores_kbd->addr))
                return PTR_ERR(opencores_kbd->addr);
 
index 038ff35..147b1f2 100644 (file)
@@ -318,40 +318,22 @@ static void ppkb_close(struct input_dev *input)
        ppkb_set_scan(client, false);
 }
 
-static void ppkb_regulator_disable(void *regulator)
-{
-       regulator_disable(regulator);
-}
-
 static int ppkb_probe(struct i2c_client *client)
 {
        struct device *dev = &client->dev;
        unsigned int phys_rows, phys_cols;
        struct pinephone_keyboard *ppkb;
-       struct regulator *vbat_supply;
        u8 info[PPKB_MATRIX_SIZE + 1];
        struct device_node *i2c_bus;
        int ret;
        int error;
 
-       vbat_supply = devm_regulator_get(dev, "vbat");
-       error = PTR_ERR_OR_ZERO(vbat_supply);
+       error = devm_regulator_get_enable(dev, "vbat");
        if (error) {
                dev_err(dev, "Failed to get VBAT supply: %d\n", error);
                return error;
        }
 
-       error = regulator_enable(vbat_supply);
-       if (error) {
-               dev_err(dev, "Failed to enable VBAT: %d\n", error);
-               return error;
-       }
-
-       error = devm_add_action_or_reset(dev, ppkb_regulator_disable,
-                                        vbat_supply);
-       if (error)
-               return error;
-
        ret = i2c_smbus_read_i2c_block_data(client, 0, sizeof(info), info);
        if (ret != sizeof(info)) {
                error = ret < 0 ? ret : -EIO;
index 871f858..3724363 100644 (file)
@@ -717,7 +717,6 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
        struct device_node *np = pdev->dev.of_node;
        struct pxa27x_keypad *keypad;
        struct input_dev *input_dev;
-       struct resource *res;
        int irq, error;
 
        /* Driver need build keycode from device tree or pdata */
@@ -728,12 +727,6 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
        if (irq < 0)
                return -ENXIO;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (res == NULL) {
-               dev_err(&pdev->dev, "failed to get I/O memory\n");
-               return -ENXIO;
-       }
-
        keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad),
                              GFP_KERNEL);
        if (!keypad)
@@ -747,7 +740,7 @@ static int pxa27x_keypad_probe(struct platform_device *pdev)
        keypad->input_dev = input_dev;
        keypad->irq = irq;
 
-       keypad->mmio_base = devm_ioremap_resource(&pdev->dev, res);
+       keypad->mmio_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(keypad->mmio_base))
                return PTR_ERR(keypad->mmio_base);
 
index 91aaa9f..9b093b0 100644 (file)
@@ -149,20 +149,20 @@ static int qt1070_probe(struct i2c_client *client)
        if (!qt1070_identify(client))
                return -ENODEV;
 
-       data = kzalloc(sizeof(struct qt1070_data), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!data || !input) {
-               dev_err(&client->dev, "insufficient memory\n");
-               err = -ENOMEM;
-               goto err_free_mem;
-       }
+       data = devm_kzalloc(&client->dev, sizeof(struct qt1070_data),
+                           GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       input = devm_input_allocate_device(&client->dev);
+       if (!input)
+               return -ENOMEM;
 
        data->client = client;
        data->input = input;
        data->irq = client->irq;
 
        input->name = "AT42QT1070 QTouch Sensor";
-       input->dev.parent = &client->dev;
        input->id.bustype = BUS_I2C;
 
        /* Add the keycode */
@@ -185,19 +185,20 @@ static int qt1070_probe(struct i2c_client *client)
        qt1070_write(client, RESET, 1);
        msleep(QT1070_RESET_TIME);
 
-       err = request_threaded_irq(client->irq, NULL, qt1070_interrupt,
-                                  IRQF_TRIGGER_NONE | IRQF_ONESHOT,
-                                  client->dev.driver->name, data);
+       err = devm_request_threaded_irq(&client->dev, client->irq,
+                                       NULL, qt1070_interrupt,
+                                       IRQF_TRIGGER_NONE | IRQF_ONESHOT,
+                                       client->dev.driver->name, data);
        if (err) {
                dev_err(&client->dev, "fail to request irq\n");
-               goto err_free_mem;
+               return err;
        }
 
        /* Register the input device */
        err = input_register_device(data->input);
        if (err) {
                dev_err(&client->dev, "Failed to register input device\n");
-               goto err_free_irq;
+               return err;
        }
 
        i2c_set_clientdata(client, data);
@@ -206,24 +207,6 @@ static int qt1070_probe(struct i2c_client *client)
        qt1070_read(client, DET_STATUS);
 
        return 0;
-
-err_free_irq:
-       free_irq(client->irq, data);
-err_free_mem:
-       input_free_device(input);
-       kfree(data);
-       return err;
-}
-
-static void qt1070_remove(struct i2c_client *client)
-{
-       struct qt1070_data *data = i2c_get_clientdata(client);
-
-       /* Release IRQ */
-       free_irq(client->irq, data);
-
-       input_unregister_device(data->input);
-       kfree(data);
 }
 
 static int qt1070_suspend(struct device *dev)
@@ -272,7 +255,6 @@ static struct i2c_driver qt1070_driver = {
        },
        .id_table       = qt1070_id,
        .probe          = qt1070_probe,
-       .remove         = qt1070_remove,
 };
 
 module_i2c_driver(qt1070_driver);
index 599ea85..7e3b096 100644 (file)
@@ -32,7 +32,7 @@
 
 #define QT2160_NUM_LEDS_X      8
 
-#define QT2160_CYCLE_INTERVAL  (2*HZ)
+#define QT2160_CYCLE_INTERVAL  2000 /* msec - 2 sec */
 
 static unsigned char qt2160_key2code[] = {
        KEY_0, KEY_1, KEY_2, KEY_3,
@@ -54,7 +54,6 @@ struct qt2160_led {
 struct qt2160_data {
        struct i2c_client *client;
        struct input_dev *input;
-       struct delayed_work dwork;
        unsigned short keycodes[ARRAY_SIZE(qt2160_key2code)];
        u16 key_matrix;
 #ifdef CONFIG_LEDS_CLASS
@@ -155,10 +154,10 @@ static int qt2160_read_block(struct i2c_client *client,
        return 0;
 }
 
-static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
+static void qt2160_get_key_matrix(struct input_dev *input)
 {
+       struct qt2160_data *qt2160 = input_get_drvdata(input);
        struct i2c_client *client = qt2160->client;
-       struct input_dev *input = qt2160->input;
        u8 regs[6];
        u16 old_matrix, new_matrix;
        int ret, i, mask;
@@ -173,7 +172,7 @@ static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
        if (ret) {
                dev_err(&client->dev,
                        "could not perform chip read.\n");
-               return ret;
+               return;
        }
 
        old_matrix = qt2160->key_matrix;
@@ -191,37 +190,17 @@ static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
        }
 
        input_sync(input);
-
-       return 0;
 }
 
-static irqreturn_t qt2160_irq(int irq, void *_qt2160)
+static irqreturn_t qt2160_irq(int irq, void *data)
 {
-       struct qt2160_data *qt2160 = _qt2160;
+       struct input_dev *input = data;
 
-       mod_delayed_work(system_wq, &qt2160->dwork, 0);
+       qt2160_get_key_matrix(input);
 
        return IRQ_HANDLED;
 }
 
-static void qt2160_schedule_read(struct qt2160_data *qt2160)
-{
-       schedule_delayed_work(&qt2160->dwork, QT2160_CYCLE_INTERVAL);
-}
-
-static void qt2160_worker(struct work_struct *work)
-{
-       struct qt2160_data *qt2160 =
-               container_of(work, struct qt2160_data, dwork.work);
-
-       dev_dbg(&qt2160->client->dev, "worker\n");
-
-       qt2160_get_key_matrix(qt2160);
-
-       /* Avoid device lock up by checking every so often */
-       qt2160_schedule_read(qt2160);
-}
-
 static int qt2160_read(struct i2c_client *client, u8 reg)
 {
        int ret;
@@ -260,7 +239,7 @@ static int qt2160_write(struct i2c_client *client, u8 reg, u8 data)
 static int qt2160_register_leds(struct qt2160_data *qt2160)
 {
        struct i2c_client *client = qt2160->client;
-       int ret;
+       int error;
        int i;
 
        for (i = 0; i < QT2160_NUM_LEDS_X; i++) {
@@ -273,9 +252,9 @@ static int qt2160_register_leds(struct qt2160_data *qt2160)
                led->id = i;
                led->qt2160 = qt2160;
 
-               ret = led_classdev_register(&client->dev, &led->cdev);
-               if (ret < 0)
-                       return ret;
+               error = devm_led_classdev_register(&client->dev, &led->cdev);
+               if (error)
+                       return error;
        }
 
        /* Tur off LEDs */
@@ -286,14 +265,6 @@ static int qt2160_register_leds(struct qt2160_data *qt2160)
        return 0;
 }
 
-static void qt2160_unregister_leds(struct qt2160_data *qt2160)
-{
-       int i;
-
-       for (i = 0; i < QT2160_NUM_LEDS_X; i++)
-               led_classdev_unregister(&qt2160->leds[i].cdev);
-}
-
 #else
 
 static inline int qt2160_register_leds(struct qt2160_data *qt2160)
@@ -301,10 +272,6 @@ static inline int qt2160_register_leds(struct qt2160_data *qt2160)
        return 0;
 }
 
-static inline void qt2160_unregister_leds(struct qt2160_data *qt2160)
-{
-}
-
 #endif
 
 static bool qt2160_identify(struct i2c_client *client)
@@ -345,12 +312,9 @@ static int qt2160_probe(struct i2c_client *client)
        int i;
        int error;
 
-       /* Check functionality */
-       error = i2c_check_functionality(client->adapter,
-                       I2C_FUNC_SMBUS_BYTE);
-       if (!error) {
+       if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) {
                dev_err(&client->dev, "%s adapter not supported\n",
-                               dev_driver_string(&client->adapter->dev));
+                       dev_driver_string(&client->adapter->dev));
                return -ENODEV;
        }
 
@@ -358,17 +322,16 @@ static int qt2160_probe(struct i2c_client *client)
                return -ENODEV;
 
        /* Chip is valid and active. Allocate structure */
-       qt2160 = kzalloc(sizeof(struct qt2160_data), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!qt2160 || !input) {
-               dev_err(&client->dev, "insufficient memory\n");
-               error = -ENOMEM;
-               goto err_free_mem;
-       }
+       qt2160 = devm_kzalloc(&client->dev, sizeof(*qt2160), GFP_KERNEL);
+       if (!qt2160)
+               return -ENOMEM;
+
+       input = devm_input_allocate_device(&client->dev);
+       if (!input)
+               return -ENOMEM;
 
        qt2160->client = client;
        qt2160->input = input;
-       INIT_DELAYED_WORK(&qt2160->dwork, qt2160_worker);
 
        input->name = "AT42QT2160 Touch Sense Keyboard";
        input->id.bustype = BUS_I2C;
@@ -385,66 +348,48 @@ static int qt2160_probe(struct i2c_client *client)
        }
        __clear_bit(KEY_RESERVED, input->keybit);
 
+       input_set_drvdata(input, qt2160);
+
        /* Calibrate device */
        error = qt2160_write(client, QT2160_CMD_CALIBRATE, 1);
        if (error) {
                dev_err(&client->dev, "failed to calibrate device\n");
-               goto err_free_mem;
+               return error;
        }
 
        if (client->irq) {
-               error = request_irq(client->irq, qt2160_irq,
-                                   IRQF_TRIGGER_FALLING, "qt2160", qt2160);
+               error = devm_request_threaded_irq(&client->dev, client->irq,
+                                                 NULL, qt2160_irq,
+                                                 IRQF_ONESHOT,
+                                                 "qt2160", input);
                if (error) {
                        dev_err(&client->dev,
                                "failed to allocate irq %d\n", client->irq);
-                       goto err_free_mem;
+                       return error;
+               }
+       } else {
+               error = input_setup_polling(input, qt2160_get_key_matrix);
+               if (error) {
+                       dev_err(&client->dev, "Failed to setup polling\n");
+                       return error;
                }
+               input_set_poll_interval(input, QT2160_CYCLE_INTERVAL);
        }
 
        error = qt2160_register_leds(qt2160);
        if (error) {
                dev_err(&client->dev, "Failed to register leds\n");
-               goto err_free_irq;
+               return error;
        }
 
        error = input_register_device(qt2160->input);
        if (error) {
                dev_err(&client->dev,
                        "Failed to register input device\n");
-               goto err_unregister_leds;
+               return error;
        }
 
-       i2c_set_clientdata(client, qt2160);
-       qt2160_schedule_read(qt2160);
-
        return 0;
-
-err_unregister_leds:
-       qt2160_unregister_leds(qt2160);
-err_free_irq:
-       if (client->irq)
-               free_irq(client->irq, qt2160);
-err_free_mem:
-       input_free_device(input);
-       kfree(qt2160);
-       return error;
-}
-
-static void qt2160_remove(struct i2c_client *client)
-{
-       struct qt2160_data *qt2160 = i2c_get_clientdata(client);
-
-       qt2160_unregister_leds(qt2160);
-
-       /* Release IRQ so no queue will be scheduled */
-       if (client->irq)
-               free_irq(client->irq, qt2160);
-
-       cancel_delayed_work_sync(&qt2160->dwork);
-
-       input_unregister_device(qt2160->input);
-       kfree(qt2160);
 }
 
 static const struct i2c_device_id qt2160_idtable[] = {
@@ -461,7 +406,6 @@ static struct i2c_driver qt2160_driver = {
 
        .id_table       = qt2160_idtable,
        .probe          = qt2160_probe,
-       .remove         = qt2160_remove,
 };
 
 module_i2c_driver(qt2160_driver);
index 15c15c0..f304cab 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
 #include <linux/pm_wakeup.h>
+#include <linux/property.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
@@ -307,8 +308,7 @@ static int sun4i_lradc_probe(struct platform_device *pdev)
 
        input_set_drvdata(lradc->input, lradc);
 
-       lradc->base = devm_ioremap_resource(dev,
-                             platform_get_resource(pdev, IORESOURCE_MEM, 0));
+       lradc->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(lradc->base))
                return PTR_ERR(lradc->base);
 
index 2f745ca..8af59ce 100644 (file)
@@ -24,6 +24,8 @@
 #define TCA6416_INVERT         2
 #define TCA6416_DIRECTION      3
 
+#define TCA6416_POLL_INTERVAL  100 /* msec */
+
 static const struct i2c_device_id tca6416_id[] = {
        { "tca6416-keys", 16, },
        { "tca6408-keys", 8, },
@@ -43,7 +45,6 @@ struct tca6416_keypad_chip {
 
        struct i2c_client *client;
        struct input_dev *input;
-       struct delayed_work dwork;
        int io_size;
        int irqnum;
        u16 pinmask;
@@ -85,9 +86,9 @@ static int tca6416_read_reg(struct tca6416_keypad_chip *chip, int reg, u16 *val)
        return 0;
 }
 
-static void tca6416_keys_scan(struct tca6416_keypad_chip *chip)
+static void tca6416_keys_scan(struct input_dev *input)
 {
-       struct input_dev *input = chip->input;
+       struct tca6416_keypad_chip *chip = input_get_drvdata(input);
        u16 reg_val, val;
        int error, i, pin_index;
 
@@ -122,33 +123,20 @@ static void tca6416_keys_scan(struct tca6416_keypad_chip *chip)
  */
 static irqreturn_t tca6416_keys_isr(int irq, void *dev_id)
 {
-       struct tca6416_keypad_chip *chip = dev_id;
-
-       tca6416_keys_scan(chip);
+       tca6416_keys_scan(dev_id);
 
        return IRQ_HANDLED;
 }
 
-static void tca6416_keys_work_func(struct work_struct *work)
-{
-       struct tca6416_keypad_chip *chip =
-               container_of(work, struct tca6416_keypad_chip, dwork.work);
-
-       tca6416_keys_scan(chip);
-       schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
-}
-
 static int tca6416_keys_open(struct input_dev *dev)
 {
        struct tca6416_keypad_chip *chip = input_get_drvdata(dev);
 
-       /* Get initial device state in case it has switches */
-       tca6416_keys_scan(chip);
-
-       if (chip->use_polling)
-               schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
-       else
-               enable_irq(chip->irqnum);
+       if (!chip->use_polling) {
+               /* Get initial device state in case it has switches */
+               tca6416_keys_scan(dev);
+               enable_irq(chip->client->irq);
+       }
 
        return 0;
 }
@@ -157,10 +145,8 @@ static void tca6416_keys_close(struct input_dev *dev)
 {
        struct tca6416_keypad_chip *chip = input_get_drvdata(dev);
 
-       if (chip->use_polling)
-               cancel_delayed_work_sync(&chip->dwork);
-       else
-               disable_irq(chip->irqnum);
+       if (!chip->use_polling)
+               disable_irq(chip->client->irq);
 }
 
 static int tca6416_setup_registers(struct tca6416_keypad_chip *chip)
@@ -216,12 +202,15 @@ static int tca6416_keypad_probe(struct i2c_client *client)
                return -EINVAL;
        }
 
-       chip = kzalloc(struct_size(chip, buttons, pdata->nbuttons), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!chip || !input) {
-               error = -ENOMEM;
-               goto fail1;
-       }
+       chip = devm_kzalloc(&client->dev,
+                           struct_size(chip, buttons, pdata->nbuttons),
+                           GFP_KERNEL);
+       if (!chip)
+               return -ENOMEM;
+
+       input = devm_input_allocate_device(&client->dev);
+       if (!input)
+               return -ENOMEM;
 
        chip->client = client;
        chip->input = input;
@@ -229,11 +218,8 @@ static int tca6416_keypad_probe(struct i2c_client *client)
        chip->pinmask = pdata->pinmask;
        chip->use_polling = pdata->use_polling;
 
-       INIT_DELAYED_WORK(&chip->dwork, tca6416_keys_work_func);
-
        input->phys = "tca6416-keys/input0";
        input->name = client->name;
-       input->dev.parent = &client->dev;
 
        input->open = tca6416_keys_open;
        input->close = tca6416_keys_close;
@@ -263,24 +249,28 @@ static int tca6416_keypad_probe(struct i2c_client *client)
         */
        error = tca6416_setup_registers(chip);
        if (error)
-               goto fail1;
+               return error;
 
-       if (!chip->use_polling) {
-               if (pdata->irq_is_gpio)
-                       chip->irqnum = gpio_to_irq(client->irq);
-               else
-                       chip->irqnum = client->irq;
-
-               error = request_threaded_irq(chip->irqnum, NULL,
-                                            tca6416_keys_isr,
-                                            IRQF_TRIGGER_FALLING |
-                                            IRQF_ONESHOT | IRQF_NO_AUTOEN,
-                                            "tca6416-keypad", chip);
+       if (chip->use_polling) {
+               error = input_setup_polling(input, tca6416_keys_scan);
+               if (error) {
+                       dev_err(&client->dev, "Failed to setup polling\n");
+                       return error;
+               }
+
+               input_set_poll_interval(input, TCA6416_POLL_INTERVAL);
+       } else {
+               error = devm_request_threaded_irq(&client->dev, client->irq,
+                                                 NULL, tca6416_keys_isr,
+                                                 IRQF_TRIGGER_FALLING |
+                                                       IRQF_ONESHOT |
+                                                       IRQF_NO_AUTOEN,
+                                                 "tca6416-keypad", input);
                if (error) {
                        dev_dbg(&client->dev,
                                "Unable to claim irq %d; error %d\n",
-                               chip->irqnum, error);
-                       goto fail1;
+                               client->irq, error);
+                       return error;
                }
        }
 
@@ -288,70 +278,19 @@ static int tca6416_keypad_probe(struct i2c_client *client)
        if (error) {
                dev_dbg(&client->dev,
                        "Unable to register input device, error: %d\n", error);
-               goto fail2;
+               return error;
        }
 
        i2c_set_clientdata(client, chip);
-       device_init_wakeup(&client->dev, 1);
 
        return 0;
-
-fail2:
-       if (!chip->use_polling) {
-               free_irq(chip->irqnum, chip);
-               enable_irq(chip->irqnum);
-       }
-fail1:
-       input_free_device(input);
-       kfree(chip);
-       return error;
 }
 
-static void tca6416_keypad_remove(struct i2c_client *client)
-{
-       struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
-
-       if (!chip->use_polling) {
-               free_irq(chip->irqnum, chip);
-               enable_irq(chip->irqnum);
-       }
-
-       input_unregister_device(chip->input);
-       kfree(chip);
-}
-
-static int tca6416_keypad_suspend(struct device *dev)
-{
-       struct i2c_client *client = to_i2c_client(dev);
-       struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
-
-       if (device_may_wakeup(dev))
-               enable_irq_wake(chip->irqnum);
-
-       return 0;
-}
-
-static int tca6416_keypad_resume(struct device *dev)
-{
-       struct i2c_client *client = to_i2c_client(dev);
-       struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
-
-       if (device_may_wakeup(dev))
-               disable_irq_wake(chip->irqnum);
-
-       return 0;
-}
-
-static DEFINE_SIMPLE_DEV_PM_OPS(tca6416_keypad_dev_pm_ops,
-                               tca6416_keypad_suspend, tca6416_keypad_resume);
-
 static struct i2c_driver tca6416_keypad_driver = {
        .driver = {
                .name   = "tca6416-keypad",
-               .pm     = pm_sleep_ptr(&tca6416_keypad_dev_pm_ops),
        },
        .probe          = tca6416_keypad_probe,
-       .remove         = tca6416_keypad_remove,
        .id_table       = tca6416_id,
 };
 
index d5a6c7d..c9a823e 100644 (file)
@@ -640,7 +640,7 @@ static int tegra_kbc_probe(struct platform_device *pdev)
 
        timer_setup(&kbc->timer, tegra_kbc_keypress_timer, 0);
 
-       kbc->mmio = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+       kbc->mmio = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(kbc->mmio))
                return PTR_ERR(kbc->mmio);
 
index 75bd3ea..0fd761a 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pm.h>
 #include <linux/regulator/consumer.h>
 
index 8a320e6..6ba984d 100644 (file)
@@ -791,10 +791,10 @@ config INPUT_IQS626A
          module will be called iqs626a.
 
 config INPUT_IQS7222
-       tristate "Azoteq IQS7222A/B/C capacitive touch controller"
+       tristate "Azoteq IQS7222A/B/C/D capacitive touch controller"
        depends on I2C
        help
-         Say Y to enable support for the Azoteq IQS7222A/B/C family
+         Say Y to enable support for the Azoteq IQS7222A/B/C/D family
          of capacitive touch controllers.
 
          To compile this driver as a module, choose M here: the
index 879790b..85cddb8 100644 (file)
@@ -1,16 +1,8 @@
-/**
+// SPDX-License-Identifier: GPL-2.0-only
+/*
  * CPCAP Power Button Input Driver
  *
  * Copyright (C) 2017 Sebastian Reichel <sre@kernel.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file "COPYING" in the main directory of this
- * archive for more details.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
index b14a389..74808ba 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/workqueue.h>
 #include <linux/regmap.h>
 #include <linux/of.h>
@@ -251,6 +252,14 @@ static int da9063_onkey_probe(struct platform_device *pdev)
                return error;
        }
 
+       error = dev_pm_set_wake_irq(&pdev->dev, irq);
+       if (error)
+               dev_warn(&pdev->dev,
+                        "Failed to set IRQ %d as a wake IRQ: %d\n",
+                        irq, error);
+       else
+               device_init_wakeup(&pdev->dev, true);
+
        error = input_register_device(onkey->input);
        if (error) {
                dev_err(&pdev->dev,
index 134a130..ad44b4d 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/input.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/regulator/consumer.h>
@@ -113,22 +113,14 @@ static int gpio_vibrator_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        vibrator->vcc = devm_regulator_get(&pdev->dev, "vcc");
-       err = PTR_ERR_OR_ZERO(vibrator->vcc);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request regulator: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->vcc))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->vcc),
+                                    "Failed to request regulator\n");
 
        vibrator->gpio = devm_gpiod_get(&pdev->dev, "enable", GPIOD_OUT_LOW);
-       err = PTR_ERR_OR_ZERO(vibrator->gpio);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request main gpio: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->gpio))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->gpio),
+                                    "Failed to request main gpio\n");
 
        INIT_WORK(&vibrator->play_work, gpio_vibrator_play_work);
 
index 1272ef7..c0a0856 100644 (file)
@@ -17,9 +17,9 @@
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index 50035c2..0dab54d 100644 (file)
@@ -19,8 +19,8 @@
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index 096b092..36aeeae 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * Azoteq IQS7222A/B/C Capacitive Touch Controller
+ * Azoteq IQS7222A/B/C/D Capacitive Touch Controller
  *
  * Copyright (C) 2022 Jeff LaBundy <jeff@labundy.com>
  */
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
+#include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/ktime.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/property.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
@@ -25,6 +26,7 @@
 #define IQS7222_PROD_NUM_A                     840
 #define IQS7222_PROD_NUM_B                     698
 #define IQS7222_PROD_NUM_C                     863
+#define IQS7222_PROD_NUM_D                     1046
 
 #define IQS7222_SYS_STATUS                     0x10
 #define IQS7222_SYS_STATUS_RESET               BIT(3)
@@ -54,6 +56,7 @@
 
 #define IQS7222_EVENT_MASK_ATI                 BIT(12)
 #define IQS7222_EVENT_MASK_SLDR                        BIT(10)
+#define IQS7222_EVENT_MASK_TPAD                        IQS7222_EVENT_MASK_SLDR
 #define IQS7222_EVENT_MASK_TOUCH               BIT(1)
 #define IQS7222_EVENT_MASK_PROX                        BIT(0)
 
@@ -71,6 +74,7 @@
 #define IQS7222_MAX_COLS_CHAN                  6
 #define IQS7222_MAX_COLS_FILT                  2
 #define IQS7222_MAX_COLS_SLDR                  11
+#define IQS7222_MAX_COLS_TPAD                  24
 #define IQS7222_MAX_COLS_GPIO                  3
 #define IQS7222_MAX_COLS_SYS                   13
 
@@ -102,16 +106,18 @@ enum iqs7222_reg_grp_id {
        IQS7222_REG_GRP_BTN,
        IQS7222_REG_GRP_CHAN,
        IQS7222_REG_GRP_SLDR,
+       IQS7222_REG_GRP_TPAD,
        IQS7222_REG_GRP_GPIO,
        IQS7222_REG_GRP_SYS,
        IQS7222_NUM_REG_GRPS
 };
 
 static const char * const iqs7222_reg_grp_names[IQS7222_NUM_REG_GRPS] = {
-       [IQS7222_REG_GRP_CYCLE] = "cycle",
-       [IQS7222_REG_GRP_CHAN] = "channel",
-       [IQS7222_REG_GRP_SLDR] = "slider",
-       [IQS7222_REG_GRP_GPIO] = "gpio",
+       [IQS7222_REG_GRP_CYCLE] = "cycle-%d",
+       [IQS7222_REG_GRP_CHAN] = "channel-%d",
+       [IQS7222_REG_GRP_SLDR] = "slider-%d",
+       [IQS7222_REG_GRP_TPAD] = "trackpad",
+       [IQS7222_REG_GRP_GPIO] = "gpio-%d",
 };
 
 static const unsigned int iqs7222_max_cols[IQS7222_NUM_REG_GRPS] = {
@@ -122,6 +128,7 @@ static const unsigned int iqs7222_max_cols[IQS7222_NUM_REG_GRPS] = {
        [IQS7222_REG_GRP_CHAN] = IQS7222_MAX_COLS_CHAN,
        [IQS7222_REG_GRP_FILT] = IQS7222_MAX_COLS_FILT,
        [IQS7222_REG_GRP_SLDR] = IQS7222_MAX_COLS_SLDR,
+       [IQS7222_REG_GRP_TPAD] = IQS7222_MAX_COLS_TPAD,
        [IQS7222_REG_GRP_GPIO] = IQS7222_MAX_COLS_GPIO,
        [IQS7222_REG_GRP_SYS] = IQS7222_MAX_COLS_SYS,
 };
@@ -130,8 +137,10 @@ static const unsigned int iqs7222_gpio_links[] = { 2, 5, 6, };
 
 struct iqs7222_event_desc {
        const char *name;
+       u16 link;
        u16 mask;
        u16 val;
+       u16 strict;
        u16 enable;
        enum iqs7222_reg_key_id reg_key;
 };
@@ -188,6 +197,93 @@ static const struct iqs7222_event_desc iqs7222_sl_events[] = {
        },
 };
 
+static const struct iqs7222_event_desc iqs7222_tp_events[] = {
+       {
+               .name = "event-press",
+               .link = BIT(7),
+       },
+       {
+               .name = "event-tap",
+               .link = BIT(0),
+               .mask = BIT(0),
+               .val = BIT(0),
+               .enable = BIT(0),
+               .reg_key = IQS7222_REG_KEY_TAP,
+       },
+       {
+               .name = "event-swipe-x-pos",
+               .link = BIT(2),
+               .mask = BIT(2) | BIT(1),
+               .val = BIT(2),
+               .strict = BIT(4),
+               .enable = BIT(1),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-swipe-y-pos",
+               .link = BIT(3),
+               .mask = BIT(3) | BIT(1),
+               .val = BIT(3),
+               .strict = BIT(3),
+               .enable = BIT(1),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-swipe-x-neg",
+               .link = BIT(4),
+               .mask = BIT(4) | BIT(1),
+               .val = BIT(4),
+               .strict = BIT(4),
+               .enable = BIT(1),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-swipe-y-neg",
+               .link = BIT(5),
+               .mask = BIT(5) | BIT(1),
+               .val = BIT(5),
+               .strict = BIT(3),
+               .enable = BIT(1),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-flick-x-pos",
+               .link = BIT(2),
+               .mask = BIT(2) | BIT(1),
+               .val = BIT(2) | BIT(1),
+               .strict = BIT(4),
+               .enable = BIT(2),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-flick-y-pos",
+               .link = BIT(3),
+               .mask = BIT(3) | BIT(1),
+               .val = BIT(3) | BIT(1),
+               .strict = BIT(3),
+               .enable = BIT(2),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-flick-x-neg",
+               .link = BIT(4),
+               .mask = BIT(4) | BIT(1),
+               .val = BIT(4) | BIT(1),
+               .strict = BIT(4),
+               .enable = BIT(2),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+       {
+               .name = "event-flick-y-neg",
+               .link = BIT(5),
+               .mask = BIT(5) | BIT(1),
+               .val = BIT(5) | BIT(1),
+               .strict = BIT(3),
+               .enable = BIT(2),
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+       },
+};
+
 struct iqs7222_reg_grp_desc {
        u16 base;
        int num_row;
@@ -524,6 +620,62 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
                        },
                },
        },
+       {
+               .prod_num = IQS7222_PROD_NUM_D,
+               .fw_major = 0,
+               .fw_minor = 37,
+               .touch_link = 1770,
+               .allow_offset = 9,
+               .event_offset = 10,
+               .comms_offset = 11,
+               .reg_grps = {
+                       [IQS7222_REG_GRP_STAT] = {
+                               .base = IQS7222_SYS_STATUS,
+                               .num_row = 1,
+                               .num_col = 7,
+                       },
+                       [IQS7222_REG_GRP_CYCLE] = {
+                               .base = 0x8000,
+                               .num_row = 7,
+                               .num_col = 2,
+                       },
+                       [IQS7222_REG_GRP_GLBL] = {
+                               .base = 0x8700,
+                               .num_row = 1,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_BTN] = {
+                               .base = 0x9000,
+                               .num_row = 14,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_CHAN] = {
+                               .base = 0xA000,
+                               .num_row = 14,
+                               .num_col = 4,
+                       },
+                       [IQS7222_REG_GRP_FILT] = {
+                               .base = 0xAE00,
+                               .num_row = 1,
+                               .num_col = 2,
+                       },
+                       [IQS7222_REG_GRP_TPAD] = {
+                               .base = 0xB000,
+                               .num_row = 1,
+                               .num_col = 24,
+                       },
+                       [IQS7222_REG_GRP_GPIO] = {
+                               .base = 0xC000,
+                               .num_row = 3,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_SYS] = {
+                               .base = IQS7222_SYS_SETUP,
+                               .num_row = 1,
+                               .num_col = 12,
+                       },
+               },
+       },
 };
 
 struct iqs7222_prop_desc {
@@ -1009,6 +1161,123 @@ static const struct iqs7222_prop_desc iqs7222_props[] = {
                .label = "maximum gesture time",
        },
        {
+               .name = "azoteq,num-rows",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 0,
+               .reg_shift = 4,
+               .reg_width = 4,
+               .val_min = 1,
+               .val_max = 12,
+               .label = "number of rows",
+       },
+       {
+               .name = "azoteq,num-cols",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 0,
+               .reg_shift = 0,
+               .reg_width = 4,
+               .val_min = 1,
+               .val_max = 12,
+               .label = "number of columns",
+       },
+       {
+               .name = "azoteq,lower-cal-y",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 1,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "lower vertical calibration",
+       },
+       {
+               .name = "azoteq,lower-cal-x",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 1,
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "lower horizontal calibration",
+       },
+       {
+               .name = "azoteq,upper-cal-y",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 2,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "upper vertical calibration",
+       },
+       {
+               .name = "azoteq,upper-cal-x",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 2,
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "upper horizontal calibration",
+       },
+       {
+               .name = "azoteq,top-speed",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 3,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 4,
+               .label = "top speed",
+       },
+       {
+               .name = "azoteq,bottom-speed",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_offset = 3,
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "bottom speed",
+       },
+       {
+               .name = "azoteq,gesture-min-ms",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_TAP,
+               .reg_offset = 20,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 16,
+               .label = "minimum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+               .reg_offset = 21,
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 16,
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_TAP,
+               .reg_offset = 21,
+               .reg_shift = 0,
+               .reg_width = 8,
+               .val_pitch = 16,
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_TAP,
+               .reg_offset = 22,
+               .reg_shift = 0,
+               .reg_width = 16,
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_grp = IQS7222_REG_GRP_TPAD,
+               .reg_key = IQS7222_REG_KEY_AXIAL,
+               .reg_offset = 23,
+               .reg_shift = 0,
+               .reg_width = 16,
+               .label = "gesture distance",
+       },
+       {
                .name = "drive-open-drain",
                .reg_grp = IQS7222_REG_GRP_GPIO,
                .reg_offset = 0,
@@ -1091,16 +1360,19 @@ struct iqs7222_private {
        struct gpio_desc *irq_gpio;
        struct i2c_client *client;
        struct input_dev *keypad;
+       struct touchscreen_properties prop;
        unsigned int kp_type[IQS7222_MAX_CHAN][ARRAY_SIZE(iqs7222_kp_events)];
        unsigned int kp_code[IQS7222_MAX_CHAN][ARRAY_SIZE(iqs7222_kp_events)];
        unsigned int sl_code[IQS7222_MAX_SLDR][ARRAY_SIZE(iqs7222_sl_events)];
        unsigned int sl_axis[IQS7222_MAX_SLDR];
+       unsigned int tp_code[ARRAY_SIZE(iqs7222_tp_events)];
        u16 cycle_setup[IQS7222_MAX_CHAN / 2][IQS7222_MAX_COLS_CYCLE];
        u16 glbl_setup[IQS7222_MAX_COLS_GLBL];
        u16 btn_setup[IQS7222_MAX_CHAN][IQS7222_MAX_COLS_BTN];
        u16 chan_setup[IQS7222_MAX_CHAN][IQS7222_MAX_COLS_CHAN];
        u16 filt_setup[IQS7222_MAX_COLS_FILT];
        u16 sldr_setup[IQS7222_MAX_SLDR][IQS7222_MAX_COLS_SLDR];
+       u16 tpad_setup[IQS7222_MAX_COLS_TPAD];
        u16 gpio_setup[ARRAY_SIZE(iqs7222_gpio_links)][IQS7222_MAX_COLS_GPIO];
        u16 sys_setup[IQS7222_MAX_COLS_SYS];
 };
@@ -1127,6 +1399,9 @@ static u16 *iqs7222_setup(struct iqs7222_private *iqs7222,
        case IQS7222_REG_GRP_SLDR:
                return iqs7222->sldr_setup[row];
 
+       case IQS7222_REG_GRP_TPAD:
+               return iqs7222->tpad_setup;
+
        case IQS7222_REG_GRP_GPIO:
                return iqs7222->gpio_setup[row];
 
@@ -1381,9 +1656,6 @@ static int iqs7222_ati_trigger(struct iqs7222_private *iqs7222)
        if (error)
                return error;
 
-       sys_setup &= ~IQS7222_SYS_SETUP_INTF_MODE_MASK;
-       sys_setup &= ~IQS7222_SYS_SETUP_PWR_MODE_MASK;
-
        for (i = 0; i < IQS7222_NUM_RETRIES; i++) {
                /*
                 * Trigger ATI from streaming and normal-power modes so that
@@ -1561,8 +1833,11 @@ static int iqs7222_dev_init(struct iqs7222_private *iqs7222, int dir)
                        return error;
        }
 
-       if (dir == READ)
+       if (dir == READ) {
+               iqs7222->sys_setup[0] &= ~IQS7222_SYS_SETUP_INTF_MODE_MASK;
+               iqs7222->sys_setup[0] &= ~IQS7222_SYS_SETUP_PWR_MODE_MASK;
                return 0;
+       }
 
        return iqs7222_ati_trigger(iqs7222);
 }
@@ -1936,6 +2211,14 @@ static int iqs7222_parse_chan(struct iqs7222_private *iqs7222,
                ref_setup[4] = dev_desc->touch_link;
                if (fwnode_property_present(chan_node, "azoteq,use-prox"))
                        ref_setup[4] -= 2;
+       } else if (dev_desc->reg_grps[IQS7222_REG_GRP_TPAD].num_row &&
+                  fwnode_property_present(chan_node,
+                                          "azoteq,counts-filt-enable")) {
+               /*
+                * In the case of IQS7222D, however, the reference mode field
+                * is partially repurposed as a counts filter enable control.
+                */
+               chan_setup[0] |= IQS7222_CHAN_SETUP_0_REF_MODE_REF;
        }
 
        if (fwnode_property_present(chan_node, "azoteq,rx-enable")) {
@@ -2278,6 +2561,136 @@ static int iqs7222_parse_sldr(struct iqs7222_private *iqs7222,
                                   IQS7222_REG_KEY_NO_WHEEL);
 }
 
+static int iqs7222_parse_tpad(struct iqs7222_private *iqs7222,
+                             struct fwnode_handle *tpad_node, int tpad_index)
+{
+       const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc;
+       struct touchscreen_properties *prop = &iqs7222->prop;
+       struct i2c_client *client = iqs7222->client;
+       int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row;
+       int count, error, i;
+       u16 *event_mask = &iqs7222->sys_setup[dev_desc->event_offset];
+       u16 *tpad_setup = iqs7222->tpad_setup;
+       unsigned int chan_sel[12];
+
+       error = iqs7222_parse_props(iqs7222, tpad_node, tpad_index,
+                                   IQS7222_REG_GRP_TPAD,
+                                   IQS7222_REG_KEY_NONE);
+       if (error)
+               return error;
+
+       count = fwnode_property_count_u32(tpad_node, "azoteq,channel-select");
+       if (count < 0) {
+               dev_err(&client->dev, "Failed to count %s channels: %d\n",
+                       fwnode_get_name(tpad_node), count);
+               return count;
+       } else if (!count || count > ARRAY_SIZE(chan_sel)) {
+               dev_err(&client->dev, "Invalid number of %s channels\n",
+                       fwnode_get_name(tpad_node));
+               return -EINVAL;
+       }
+
+       error = fwnode_property_read_u32_array(tpad_node,
+                                              "azoteq,channel-select",
+                                              chan_sel, count);
+       if (error) {
+               dev_err(&client->dev, "Failed to read %s channels: %d\n",
+                       fwnode_get_name(tpad_node), error);
+               return error;
+       }
+
+       tpad_setup[6] &= ~GENMASK(num_chan - 1, 0);
+
+       for (i = 0; i < ARRAY_SIZE(chan_sel); i++) {
+               tpad_setup[8 + i] = 0;
+               if (i >= count || chan_sel[i] == U8_MAX)
+                       continue;
+
+               if (chan_sel[i] >= num_chan) {
+                       dev_err(&client->dev, "Invalid %s channel: %u\n",
+                               fwnode_get_name(tpad_node), chan_sel[i]);
+                       return -EINVAL;
+               }
+
+               /*
+                * The following fields indicate which channels participate in
+                * the trackpad, as well as each channel's relative placement.
+                */
+               tpad_setup[6] |= BIT(chan_sel[i]);
+               tpad_setup[8 + i] = chan_sel[i] * 34 + 1072;
+       }
+
+       tpad_setup[7] = dev_desc->touch_link;
+       if (fwnode_property_present(tpad_node, "azoteq,use-prox"))
+               tpad_setup[7] -= 2;
+
+       for (i = 0; i < ARRAY_SIZE(iqs7222_tp_events); i++)
+               tpad_setup[20] &= ~(iqs7222_tp_events[i].strict |
+                                   iqs7222_tp_events[i].enable);
+
+       for (i = 0; i < ARRAY_SIZE(iqs7222_tp_events); i++) {
+               const char *event_name = iqs7222_tp_events[i].name;
+               struct fwnode_handle *event_node;
+
+               event_node = fwnode_get_named_child_node(tpad_node, event_name);
+               if (!event_node)
+                       continue;
+
+               if (fwnode_property_present(event_node,
+                                           "azoteq,gesture-angle-tighten"))
+                       tpad_setup[20] |= iqs7222_tp_events[i].strict;
+
+               tpad_setup[20] |= iqs7222_tp_events[i].enable;
+
+               error = iqs7222_parse_event(iqs7222, event_node, tpad_index,
+                                           IQS7222_REG_GRP_TPAD,
+                                           iqs7222_tp_events[i].reg_key,
+                                           iqs7222_tp_events[i].link, 1566,
+                                           NULL,
+                                           &iqs7222->tp_code[i]);
+               fwnode_handle_put(event_node);
+               if (error)
+                       return error;
+
+               if (!dev_desc->event_offset)
+                       continue;
+
+               /*
+                * The press/release event is determined based on whether the
+                * coordinate fields report 0xFFFF and solely relies on touch
+                * or proximity interrupts to be unmasked.
+                */
+               if (i)
+                       *event_mask |= IQS7222_EVENT_MASK_TPAD;
+               else if (tpad_setup[7] == dev_desc->touch_link)
+                       *event_mask |= IQS7222_EVENT_MASK_TOUCH;
+               else
+                       *event_mask |= IQS7222_EVENT_MASK_PROX;
+       }
+
+       if (!iqs7222->tp_code[0])
+               return 0;
+
+       input_set_abs_params(iqs7222->keypad, ABS_X,
+                            0, (tpad_setup[4] ? : 1) - 1, 0, 0);
+
+       input_set_abs_params(iqs7222->keypad, ABS_Y,
+                            0, (tpad_setup[5] ? : 1) - 1, 0, 0);
+
+       touchscreen_parse_properties(iqs7222->keypad, false, prop);
+
+       if (prop->max_x >= U16_MAX || prop->max_y >= U16_MAX) {
+               dev_err(&client->dev, "Invalid trackpad size: %u*%u\n",
+                       prop->max_x, prop->max_y);
+               return -EINVAL;
+       }
+
+       tpad_setup[4] = prop->max_x + 1;
+       tpad_setup[5] = prop->max_y + 1;
+
+       return 0;
+}
+
 static int (*iqs7222_parse_extra[IQS7222_NUM_REG_GRPS])
                                (struct iqs7222_private *iqs7222,
                                 struct fwnode_handle *reg_grp_node,
@@ -2285,6 +2698,7 @@ static int (*iqs7222_parse_extra[IQS7222_NUM_REG_GRPS])
        [IQS7222_REG_GRP_CYCLE] = iqs7222_parse_cycle,
        [IQS7222_REG_GRP_CHAN] = iqs7222_parse_chan,
        [IQS7222_REG_GRP_SLDR] = iqs7222_parse_sldr,
+       [IQS7222_REG_GRP_TPAD] = iqs7222_parse_tpad,
 };
 
 static int iqs7222_parse_reg_grp(struct iqs7222_private *iqs7222,
@@ -2298,7 +2712,7 @@ static int iqs7222_parse_reg_grp(struct iqs7222_private *iqs7222,
        if (iqs7222_reg_grp_names[reg_grp]) {
                char reg_grp_name[16];
 
-               snprintf(reg_grp_name, sizeof(reg_grp_name), "%s-%d",
+               snprintf(reg_grp_name, sizeof(reg_grp_name),
                         iqs7222_reg_grp_names[reg_grp], reg_grp_index);
 
                reg_grp_node = device_get_named_child_node(&client->dev,
@@ -2346,8 +2760,8 @@ static int iqs7222_parse_all(struct iqs7222_private *iqs7222)
                        continue;
 
                /*
-                * The IQS7222C exposes multiple GPIO and must be informed
-                * as to which GPIO this group represents.
+                * The IQS7222C and IQS7222D expose multiple GPIO and must be
+                * informed as to which GPIO this group represents.
                 */
                for (j = 0; j < ARRAY_SIZE(iqs7222_gpio_links); j++)
                        gpio_setup[0] &= ~BIT(iqs7222_gpio_links[j]);
@@ -2480,6 +2894,41 @@ static int iqs7222_report(struct iqs7222_private *iqs7222)
                                         iqs7222->sl_code[i][j], 0);
        }
 
+       for (i = 0; i < dev_desc->reg_grps[IQS7222_REG_GRP_TPAD].num_row; i++) {
+               u16 tpad_pos_x = le16_to_cpu(status[4]);
+               u16 tpad_pos_y = le16_to_cpu(status[5]);
+               u16 state = le16_to_cpu(status[6]);
+
+               input_report_key(iqs7222->keypad, iqs7222->tp_code[0],
+                                tpad_pos_x < U16_MAX);
+
+               if (tpad_pos_x < U16_MAX)
+                       touchscreen_report_pos(iqs7222->keypad, &iqs7222->prop,
+                                              tpad_pos_x, tpad_pos_y, false);
+
+               if (!(le16_to_cpu(status[1]) & IQS7222_EVENT_MASK_TPAD))
+                       continue;
+
+               /*
+                * Skip the press/release event, as it does not have separate
+                * status fields and is handled separately.
+                */
+               for (j = 1; j < ARRAY_SIZE(iqs7222_tp_events); j++) {
+                       u16 mask = iqs7222_tp_events[j].mask;
+                       u16 val = iqs7222_tp_events[j].val;
+
+                       input_report_key(iqs7222->keypad,
+                                        iqs7222->tp_code[j],
+                                        (state & mask) == val);
+               }
+
+               input_sync(iqs7222->keypad);
+
+               for (j = 1; j < ARRAY_SIZE(iqs7222_tp_events); j++)
+                       input_report_key(iqs7222->keypad,
+                                        iqs7222->tp_code[j], 0);
+       }
+
        input_sync(iqs7222->keypad);
 
        return 0;
@@ -2584,6 +3033,7 @@ static const struct of_device_id iqs7222_of_match[] = {
        { .compatible = "azoteq,iqs7222a" },
        { .compatible = "azoteq,iqs7222b" },
        { .compatible = "azoteq,iqs7222c" },
+       { .compatible = "azoteq,iqs7222d" },
        { }
 };
 MODULE_DEVICE_TABLE(of, iqs7222_of_match);
@@ -2598,5 +3048,5 @@ static struct i2c_driver iqs7222_i2c_driver = {
 module_i2c_driver(iqs7222_i2c_driver);
 
 MODULE_AUTHOR("Jeff LaBundy <jeff@labundy.com>");
-MODULE_DESCRIPTION("Azoteq IQS7222A/B/C Capacitive Touch Controller");
+MODULE_DESCRIPTION("Azoteq IQS7222A/B/C/D Capacitive Touch Controller");
 MODULE_LICENSE("GPL");
index 76a190b..662b436 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 
 #define MMA8450_DRV_NAME       "mma8450"
 
index 74d77d8..ba747c5 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/regmap.h>
index 04cb87e..5c288fe 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index 89fb137..c406a1c 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/regmap.h>
 #include <linux/log2.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #define PON_CNTL_1 0x1C
 #define PON_CNTL_PULL_UP BIT(7)
index 3cf1812..1e731d8 100644 (file)
@@ -132,13 +132,8 @@ static int pwm_beeper_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        beeper->pwm = devm_pwm_get(dev, NULL);
-       if (IS_ERR(beeper->pwm)) {
-               error = PTR_ERR(beeper->pwm);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to request PWM device: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(beeper->pwm))
+               return dev_err_probe(dev, PTR_ERR(beeper->pwm), "Failed to request PWM device\n");
 
        /* Sync up PWM state and ensure it is off. */
        pwm_init_state(beeper->pwm, &state);
@@ -151,13 +146,9 @@ static int pwm_beeper_probe(struct platform_device *pdev)
        }
 
        beeper->amplifier = devm_regulator_get(dev, "amp");
-       if (IS_ERR(beeper->amplifier)) {
-               error = PTR_ERR(beeper->amplifier);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get 'amp' regulator: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(beeper->amplifier))
+               return dev_err_probe(dev, PTR_ERR(beeper->amplifier),
+                                    "Failed to get 'amp' regulator\n");
 
        INIT_WORK(&beeper->work, pwm_beeper_work);
 
index 2ba0352..acac79c 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/input.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/pwm.h>
@@ -140,32 +140,20 @@ static int pwm_vibrator_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        vibrator->vcc = devm_regulator_get(&pdev->dev, "vcc");
-       err = PTR_ERR_OR_ZERO(vibrator->vcc);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request regulator: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->vcc))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->vcc),
+                                    "Failed to request regulator\n");
 
        vibrator->enable_gpio = devm_gpiod_get_optional(&pdev->dev, "enable",
                                                        GPIOD_OUT_LOW);
-       err = PTR_ERR_OR_ZERO(vibrator->enable_gpio);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request enable gpio: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->enable_gpio))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->enable_gpio),
+                                    "Failed to request enable gpio\n");
 
        vibrator->pwm = devm_pwm_get(&pdev->dev, "enable");
-       err = PTR_ERR_OR_ZERO(vibrator->pwm);
-       if (err) {
-               if (err != -EPROBE_DEFER)
-                       dev_err(&pdev->dev, "Failed to request main pwm: %d\n",
-                               err);
-               return err;
-       }
+       if (IS_ERR(vibrator->pwm))
+               return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->pwm),
+                                    "Failed to request main pwm\n");
 
        INIT_WORK(&vibrator->play_work, pwm_vibrator_play_work);
 
index 22ec620..e94cab8 100644 (file)
@@ -236,12 +236,8 @@ static int rotary_encoder_probe(struct platform_device *pdev)
                device_property_read_bool(dev, "rotary-encoder,relative-axis");
 
        encoder->gpios = devm_gpiod_get_array(dev, NULL, GPIOD_IN);
-       if (IS_ERR(encoder->gpios)) {
-               err = PTR_ERR(encoder->gpios);
-               if (err != -EPROBE_DEFER)
-                       dev_err(dev, "unable to get gpios: %d\n", err);
-               return err;
-       }
+       if (IS_ERR(encoder->gpios))
+               return dev_err_probe(dev, PTR_ERR(encoder->gpios), "unable to get gpios\n");
        if (encoder->gpios->ndescs < 2) {
                dev_err(dev, "not enough gpios found\n");
                return -EINVAL;
@@ -255,7 +251,6 @@ static int rotary_encoder_probe(struct platform_device *pdev)
 
        input->name = pdev->name;
        input->id.bustype = BUS_HOST;
-       input->dev.parent = dev;
 
        if (encoder->relative_axis)
                input_set_capability(input, EV_REL, encoder->axis);
index cdcb773..e5dd847 100644 (file)
@@ -9,7 +9,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/input.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #include <asm/io.h>
index 0cff742..148a601 100644 (file)
@@ -1221,13 +1221,8 @@ static int elan_probe(struct i2c_client *client)
        mutex_init(&data->sysfs_mutex);
 
        data->vcc = devm_regulator_get(dev, "vcc");
-       if (IS_ERR(data->vcc)) {
-               error = PTR_ERR(data->vcc);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get 'vcc' regulator: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(data->vcc))
+               return dev_err_probe(dev, PTR_ERR(data->vcc), "Failed to get 'vcc' regulator\n");
 
        error = regulator_enable(data->vcc);
        if (error) {
index 2a2459b..7b13de9 100644 (file)
@@ -5,6 +5,7 @@
 
 #define pr_fmt(fmt)            KBUILD_MODNAME ": " fmt
 
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/libps2.h>
@@ -118,13 +119,18 @@ static psmouse_ret_t psmouse_smbus_process_byte(struct psmouse *psmouse)
        return PSMOUSE_FULL_PACKET;
 }
 
-static int psmouse_smbus_reconnect(struct psmouse *psmouse)
+static void psmouse_activate_smbus_mode(struct psmouse_smbus_dev *smbdev)
 {
-       struct psmouse_smbus_dev *smbdev = psmouse->private;
-
-       if (smbdev->need_deactivate)
-               psmouse_deactivate(psmouse);
+       if (smbdev->need_deactivate) {
+               psmouse_deactivate(smbdev->psmouse);
+               /* Give the device time to switch into SMBus mode */
+               msleep(30);
+       }
+}
 
+static int psmouse_smbus_reconnect(struct psmouse *psmouse)
+{
+       psmouse_activate_smbus_mode(psmouse->private);
        return 0;
 }
 
@@ -257,8 +263,7 @@ int psmouse_smbus_init(struct psmouse *psmouse,
                }
        }
 
-       if (need_deactivate)
-               psmouse_deactivate(psmouse);
+       psmouse_activate_smbus_mode(smbdev);
 
        psmouse->private = smbdev;
        psmouse->protocol_handler = psmouse_smbus_process_byte;
index 513d96e..3f6866d 100644 (file)
  * Contributors: Daniel Hellstrom <daniel@gaisler.com>
  */
 #include <linux/platform_device.h>
-#include <linux/of_device.h>
 #include <linux/module.h>
 #include <linux/serio.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/device.h>
 #include <linux/delay.h>
index 028e45b..1724d6c 100644 (file)
@@ -1281,6 +1281,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = {
                .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
                                        SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
        },
+       /* See comment on TUXEDO InfinityBook S17 Gen6 / Clevo NS70MU above */
+       {
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "PD5x_7xPNP_PNR_PNN_PNT"),
+               },
+               .driver_data = (void *)(SERIO_QUIRK_NOAUX)
+       },
        {
                .matches = {
                        DMI_MATCH(DMI_BOARD_NAME, "X170SM"),
index c712c1f..b68793b 100644 (file)
@@ -2,7 +2,9 @@
 #ifndef _I8042_SPARCIO_H
 #define _I8042_SPARCIO_H
 
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/types.h>
 
 #include <asm/io.h>
index ce420eb..e8a9709 100644 (file)
@@ -101,12 +101,12 @@ static int rpckbd_probe(struct platform_device *dev)
        int tx_irq, rx_irq;
 
        rx_irq = platform_get_irq(dev, 0);
-       if (rx_irq <= 0)
-               return rx_irq < 0 ? rx_irq : -ENXIO;
+       if (rx_irq < 0)
+               return rx_irq;
 
        tx_irq = platform_get_irq(dev, 1);
-       if (tx_irq <= 0)
-               return tx_irq < 0 ? tx_irq : -ENXIO;
+       if (tx_irq < 0)
+               return tx_irq;
 
        serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
        rpckbd = kzalloc(sizeof(*rpckbd), GFP_KERNEL);
index 960d760..f3d28da 100644 (file)
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #define DRIVER_NAME            "xilinx_ps2"
 
index c2cbd33..e3e2324 100644 (file)
@@ -655,10 +655,10 @@ config TOUCHSCREEN_MTOUCH
          module will be called mtouch.
 
 config TOUCHSCREEN_NOVATEK_NVT_TS
-       tristate "Novatek NVT-ts touchscreen support"
+       tristate "Novatek NT11205 touchscreen support"
        depends on I2C
        help
-         Say Y here if you have a Novatek NVT-ts touchscreen.
+         Say Y here if you have a Novatek NT11205 touchscreen.
          If unsure, say N.
 
          To compile this driver as a module, choose M here: the
@@ -1365,6 +1365,16 @@ config TOUCHSCREEN_IQS5XX
          To compile this driver as a module, choose M here: the
          module will be called iqs5xx.
 
+config TOUCHSCREEN_IQS7211
+       tristate "Azoteq IQS7210A/7211A/E trackpad/touchscreen controller"
+       depends on I2C
+       help
+         Say Y to enable support for the Azoteq IQS7210A/7211A/E
+         family of trackpad/touchscreen controllers.
+
+         To compile this driver as a module, choose M here: the
+         module will be called iqs7211.
+
 config TOUCHSCREEN_ZINITIX
        tristate "Zinitix touchscreen support"
        depends on I2C
index 159cd51..62bd24f 100644 (file)
@@ -115,5 +115,6 @@ obj-$(CONFIG_TOUCHSCREEN_COLIBRI_VF50)      += colibri-vf50-ts.o
 obj-$(CONFIG_TOUCHSCREEN_ROHM_BU21023) += rohm_bu21023.o
 obj-$(CONFIG_TOUCHSCREEN_RASPBERRYPI_FW)       += raspberrypi-ts.o
 obj-$(CONFIG_TOUCHSCREEN_IQS5XX)       += iqs5xx.o
+obj-$(CONFIG_TOUCHSCREEN_IQS7211)      += iqs7211.o
 obj-$(CONFIG_TOUCHSCREEN_ZINITIX)      += zinitix.o
 obj-$(CONFIG_TOUCHSCREEN_HIMAX_HX83112B)       += himax_hx83112b.o
index 85332cf..652439a 100644 (file)
@@ -410,31 +410,32 @@ static int bu21013_probe(struct i2c_client *client)
        struct input_dev *in_dev;
        struct input_absinfo *info;
        u32 max_x = 0, max_y = 0;
+       struct device *dev = &client->dev;
        int error;
 
        if (!i2c_check_functionality(client->adapter,
                                     I2C_FUNC_SMBUS_BYTE_DATA)) {
-               dev_err(&client->dev, "i2c smbus byte data not supported\n");
+               dev_err(dev, "i2c smbus byte data not supported\n");
                return -EIO;
        }
 
        if (!client->irq) {
-               dev_err(&client->dev, "No IRQ set up\n");
+               dev_err(dev, "No IRQ set up\n");
                return -EINVAL;
        }
 
-       ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL);
+       ts = devm_kzalloc(dev, sizeof(*ts), GFP_KERNEL);
        if (!ts)
                return -ENOMEM;
 
        ts->client = client;
 
-       ts->x_flip = device_property_read_bool(&client->dev, "rohm,flip-x");
-       ts->y_flip = device_property_read_bool(&client->dev, "rohm,flip-y");
+       ts->x_flip = device_property_read_bool(dev, "rohm,flip-x");
+       ts->y_flip = device_property_read_bool(dev, "rohm,flip-y");
 
-       in_dev = devm_input_allocate_device(&client->dev);
+       in_dev = devm_input_allocate_device(dev);
        if (!in_dev) {
-               dev_err(&client->dev, "device memory alloc failed\n");
+               dev_err(dev, "device memory alloc failed\n");
                return -ENOMEM;
        }
        ts->in_dev = in_dev;
@@ -444,8 +445,8 @@ static int bu21013_probe(struct i2c_client *client)
        in_dev->name = DRIVER_TP;
        in_dev->id.bustype = BUS_I2C;
 
-       device_property_read_u32(&client->dev, "rohm,touch-max-x", &max_x);
-       device_property_read_u32(&client->dev, "rohm,touch-max-y", &max_y);
+       device_property_read_u32(dev, "rohm,touch-max-x", &max_x);
+       device_property_read_u32(dev, "rohm,touch-max-y", &max_y);
 
        input_set_abs_params(in_dev, ABS_MT_POSITION_X, 0, max_x, 0, 0);
        input_set_abs_params(in_dev, ABS_MT_POSITION_Y, 0, max_y, 0, 0);
@@ -454,14 +455,14 @@ static int bu21013_probe(struct i2c_client *client)
 
        /* Adjust for the legacy "flip" properties, if present */
        if (!ts->props.invert_x &&
-           device_property_read_bool(&client->dev, "rohm,flip-x")) {
+           device_property_read_bool(dev, "rohm,flip-x")) {
                info = &in_dev->absinfo[ABS_MT_POSITION_X];
                info->maximum -= info->minimum;
                info->minimum = 0;
        }
 
        if (!ts->props.invert_y &&
-           device_property_read_bool(&client->dev, "rohm,flip-y")) {
+           device_property_read_bool(dev, "rohm,flip-y")) {
                info = &in_dev->absinfo[ABS_MT_POSITION_Y];
                info->maximum -= info->minimum;
                info->minimum = 0;
@@ -471,55 +472,46 @@ static int bu21013_probe(struct i2c_client *client)
                                    INPUT_MT_DIRECT | INPUT_MT_TRACK |
                                        INPUT_MT_DROP_UNUSED);
        if (error) {
-               dev_err(&client->dev, "failed to initialize MT slots");
+               dev_err(dev, "failed to initialize MT slots");
                return error;
        }
 
-       ts->regulator = devm_regulator_get(&client->dev, "avdd");
+       ts->regulator = devm_regulator_get(dev, "avdd");
        if (IS_ERR(ts->regulator)) {
-               dev_err(&client->dev, "regulator_get failed\n");
+               dev_err(dev, "regulator_get failed\n");
                return PTR_ERR(ts->regulator);
        }
 
        error = regulator_enable(ts->regulator);
        if (error) {
-               dev_err(&client->dev, "regulator enable failed\n");
+               dev_err(dev, "regulator enable failed\n");
                return error;
        }
 
-       error = devm_add_action_or_reset(&client->dev, bu21013_power_off, ts);
+       error = devm_add_action_or_reset(dev, bu21013_power_off, ts);
        if (error) {
-               dev_err(&client->dev, "failed to install power off handler\n");
+               dev_err(dev, "failed to install power off handler\n");
                return error;
        }
 
        /* Named "CS" on the chip, DT binding is "reset" */
-       ts->cs_gpiod = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_HIGH);
-       error = PTR_ERR_OR_ZERO(ts->cs_gpiod);
-       if (error) {
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev, "failed to get CS GPIO\n");
-               return error;
-       }
+       ts->cs_gpiod = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(ts->cs_gpiod))
+               return dev_err_probe(dev, PTR_ERR(ts->cs_gpiod), "failed to get CS GPIO\n");
+
        gpiod_set_consumer_name(ts->cs_gpiod, "BU21013 CS");
 
-       error = devm_add_action_or_reset(&client->dev,
-                                        bu21013_disable_chip, ts);
+       error = devm_add_action_or_reset(dev, bu21013_disable_chip, ts);
        if (error) {
-               dev_err(&client->dev,
-                       "failed to install chip disable handler\n");
+               dev_err(dev, "failed to install chip disable handler\n");
                return error;
        }
 
        /* Named "INT" on the chip, DT binding is "touch" */
-       ts->int_gpiod = devm_gpiod_get_optional(&client->dev,
-                                               "touch", GPIOD_IN);
+       ts->int_gpiod = devm_gpiod_get_optional(dev, "touch", GPIOD_IN);
        error = PTR_ERR_OR_ZERO(ts->int_gpiod);
-       if (error) {
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev, "failed to get INT GPIO\n");
-               return error;
-       }
+       if (error)
+               return dev_err_probe(dev, error, "failed to get INT GPIO\n");
 
        if (ts->int_gpiod)
                gpiod_set_consumer_name(ts->int_gpiod, "BU21013 INT");
@@ -527,22 +519,20 @@ static int bu21013_probe(struct i2c_client *client)
        /* configure the touch panel controller */
        error = bu21013_init_chip(ts);
        if (error) {
-               dev_err(&client->dev, "error in bu21013 config\n");
+               dev_err(dev, "error in bu21013 config\n");
                return error;
        }
 
-       error = devm_request_threaded_irq(&client->dev, client->irq,
-                                         NULL, bu21013_gpio_irq,
+       error = devm_request_threaded_irq(dev, client->irq, NULL, bu21013_gpio_irq,
                                          IRQF_ONESHOT, DRIVER_TP, ts);
        if (error) {
-               dev_err(&client->dev, "request irq %d failed\n",
-                       client->irq);
+               dev_err(dev, "request irq %d failed\n", client->irq);
                return error;
        }
 
        error = input_register_device(in_dev);
        if (error) {
-               dev_err(&client->dev, "failed to register input device\n");
+               dev_err(dev, "failed to register input device\n");
                return error;
        }
 
index c8126d2..e1dfbd9 100644 (file)
@@ -333,6 +333,7 @@ static void bu21029_stop_chip(struct input_dev *dev)
 
 static int bu21029_probe(struct i2c_client *client)
 {
+       struct device *dev = &client->dev;
        struct bu21029_ts_data *bu21029;
        struct input_dev *in_dev;
        int error;
@@ -341,45 +342,33 @@ static int bu21029_probe(struct i2c_client *client)
                                     I2C_FUNC_SMBUS_WRITE_BYTE |
                                     I2C_FUNC_SMBUS_WRITE_BYTE_DATA |
                                     I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
-               dev_err(&client->dev,
-                       "i2c functionality support is not sufficient\n");
+               dev_err(dev, "i2c functionality support is not sufficient\n");
                return -EIO;
        }
 
-       bu21029 = devm_kzalloc(&client->dev, sizeof(*bu21029), GFP_KERNEL);
+       bu21029 = devm_kzalloc(dev, sizeof(*bu21029), GFP_KERNEL);
        if (!bu21029)
                return -ENOMEM;
 
-       error = device_property_read_u32(&client->dev, "rohm,x-plate-ohms",
-                                        &bu21029->x_plate_ohms);
+       error = device_property_read_u32(dev, "rohm,x-plate-ohms", &bu21029->x_plate_ohms);
        if (error) {
-               dev_err(&client->dev,
-                       "invalid 'x-plate-ohms' supplied: %d\n", error);
+               dev_err(dev, "invalid 'x-plate-ohms' supplied: %d\n", error);
                return error;
        }
 
-       bu21029->vdd = devm_regulator_get(&client->dev, "vdd");
-       if (IS_ERR(bu21029->vdd)) {
-               error = PTR_ERR(bu21029->vdd);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "failed to acquire 'vdd' supply: %d\n", error);
-               return error;
-       }
+       bu21029->vdd = devm_regulator_get(dev, "vdd");
+       if (IS_ERR(bu21029->vdd))
+               return dev_err_probe(dev, PTR_ERR(bu21029->vdd),
+                                    "failed to acquire 'vdd' supply\n");
 
-       bu21029->reset_gpios = devm_gpiod_get_optional(&client->dev,
-                                                      "reset", GPIOD_OUT_HIGH);
-       if (IS_ERR(bu21029->reset_gpios)) {
-               error = PTR_ERR(bu21029->reset_gpios);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "failed to acquire 'reset' gpio: %d\n", error);
-               return error;
-       }
+       bu21029->reset_gpios = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(bu21029->reset_gpios))
+               return dev_err_probe(dev, PTR_ERR(bu21029->reset_gpios),
+                                    "failed to acquire 'reset' gpio\n");
 
-       in_dev = devm_input_allocate_device(&client->dev);
+       in_dev = devm_input_allocate_device(dev);
        if (!in_dev) {
-               dev_err(&client->dev, "unable to allocate input device\n");
+               dev_err(dev, "unable to allocate input device\n");
                return -ENOMEM;
        }
 
@@ -400,20 +389,18 @@ static int bu21029_probe(struct i2c_client *client)
 
        input_set_drvdata(in_dev, bu21029);
 
-       error = devm_request_threaded_irq(&client->dev, client->irq,
-                                         NULL, bu21029_touch_soft_irq,
+       error = devm_request_threaded_irq(dev, client->irq, NULL,
+                                         bu21029_touch_soft_irq,
                                          IRQF_ONESHOT | IRQF_NO_AUTOEN,
                                          DRIVER_NAME, bu21029);
        if (error) {
-               dev_err(&client->dev,
-                       "unable to request touch irq: %d\n", error);
+               dev_err(dev, "unable to request touch irq: %d\n", error);
                return error;
        }
 
        error = input_register_device(in_dev);
        if (error) {
-               dev_err(&client->dev,
-                       "unable to register input device: %d\n", error);
+               dev_err(dev, "unable to register input device: %d\n", error);
                return error;
        }
 
index 9fbeaf1..d6876d1 100644 (file)
@@ -191,12 +191,8 @@ static int icn8318_probe(struct i2c_client *client)
                return -ENOMEM;
 
        data->wake_gpio = devm_gpiod_get(dev, "wake", GPIOD_OUT_LOW);
-       if (IS_ERR(data->wake_gpio)) {
-               error = PTR_ERR(data->wake_gpio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Error getting wake gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(data->wake_gpio))
+               return dev_err_probe(dev, PTR_ERR(data->wake_gpio), "Error getting wake gpio\n");
 
        input = devm_input_allocate_device(dev);
        if (!input)
index 967ecde..ea38951 100644 (file)
@@ -258,12 +258,8 @@ static int cy8ctma140_probe(struct i2c_client *client)
        ts->regulators[1].supply = "vdd";
        error = devm_regulator_bulk_get(dev, ARRAY_SIZE(ts->regulators),
                                      ts->regulators);
-       if (error) {
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get regulators %d\n",
-                               error);
-               return error;
-       }
+       if (error)
+               return dev_err_probe(dev, error, "Failed to get regulators\n");
 
        error = cy8ctma140_power_up(ts);
        if (error)
index b461ded..db5a885 100644 (file)
@@ -18,8 +18,8 @@
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <asm/unaligned.h>
 
index 795c7da..457d533 100644 (file)
@@ -1168,13 +1168,9 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client)
        tsdata->max_support_points = chip_data->max_support_points;
 
        tsdata->vcc = devm_regulator_get(&client->dev, "vcc");
-       if (IS_ERR(tsdata->vcc)) {
-               error = PTR_ERR(tsdata->vcc);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "failed to request regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(tsdata->vcc))
+               return dev_err_probe(&client->dev, PTR_ERR(tsdata->vcc),
+                                    "failed to request regulator\n");
 
        tsdata->iovcc = devm_regulator_get(&client->dev, "iovcc");
        if (IS_ERR(tsdata->iovcc)) {
index fd8724a..cc3103b 100644 (file)
@@ -264,12 +264,8 @@ static int ektf2127_probe(struct i2c_client *client)
 
        /* This requests the gpio *and* turns on the touchscreen controller */
        ts->power_gpios = devm_gpiod_get(dev, "power", GPIOD_OUT_HIGH);
-       if (IS_ERR(ts->power_gpios)) {
-               error = PTR_ERR(ts->power_gpios);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Error getting power gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->power_gpios))
+               return dev_err_probe(dev, PTR_ERR(ts->power_gpios), "Error getting power gpio\n");
 
        input = devm_input_allocate_device(dev);
        if (!input)
index 2da1db6..a1af3de 100644 (file)
@@ -1438,24 +1438,14 @@ static int elants_i2c_probe(struct i2c_client *client)
        i2c_set_clientdata(client, ts);
 
        ts->vcc33 = devm_regulator_get(&client->dev, "vcc33");
-       if (IS_ERR(ts->vcc33)) {
-               error = PTR_ERR(ts->vcc33);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get 'vcc33' regulator: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(ts->vcc33))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->vcc33),
+                                    "Failed to get 'vcc33' regulator\n");
 
        ts->vccio = devm_regulator_get(&client->dev, "vccio");
-       if (IS_ERR(ts->vccio)) {
-               error = PTR_ERR(ts->vccio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get 'vccio' regulator: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(ts->vccio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->vccio),
+                                    "Failed to get 'vccio' regulator\n");
 
        ts->reset_gpio = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_HIGH);
        if (IS_ERR(ts->reset_gpio)) {
index 4af4c1e..4c0d99a 100644 (file)
@@ -7,6 +7,7 @@
  * minimal implementation based on egalax_ts.c and egalax_i2c.c
  */
 
+#include <linux/acpi.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -18,6 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/regulator/consumer.h>
 #include <linux/sizes.h>
 #include <linux/timer.h>
 #include <asm/unaligned.h>
@@ -360,6 +362,12 @@ static int exc3000_probe(struct i2c_client *client)
        if (IS_ERR(data->reset))
                return PTR_ERR(data->reset);
 
+       /* For proper reset sequence, enable power while reset asserted */
+       error = devm_regulator_get_enable(&client->dev, "vdd");
+       if (error && error != -ENODEV)
+               return dev_err_probe(&client->dev, error,
+                                    "failed to request vdd regulator\n");
+
        if (data->reset) {
                msleep(EXC3000_RESET_MS);
                gpiod_set_value_cansleep(data->reset, 0);
@@ -454,10 +462,19 @@ static const struct of_device_id exc3000_of_match[] = {
 MODULE_DEVICE_TABLE(of, exc3000_of_match);
 #endif
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id exc3000_acpi_match[] = {
+       { "EGA00001", .driver_data = (kernel_ulong_t)&exc3000_info[EETI_EXC80H60] },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, exc3000_acpi_match);
+#endif
+
 static struct i2c_driver exc3000_driver = {
        .driver = {
                .name   = "exc3000",
                .of_match_table = of_match_ptr(exc3000_of_match),
+               .acpi_match_table = ACPI_PTR(exc3000_acpi_match),
        },
        .id_table       = exc3000_id,
        .probe          = exc3000_probe,
index f5aa240..da9954d 100644 (file)
@@ -935,7 +935,6 @@ static int goodix_add_acpi_gpio_mappings(struct goodix_ts_data *ts)
  */
 static int goodix_get_gpio_config(struct goodix_ts_data *ts)
 {
-       int error;
        struct device *dev;
        struct gpio_desc *gpiod;
        bool added_acpi_mappings = false;
@@ -951,33 +950,20 @@ static int goodix_get_gpio_config(struct goodix_ts_data *ts)
        ts->gpiod_rst_flags = GPIOD_IN;
 
        ts->avdd28 = devm_regulator_get(dev, "AVDD28");
-       if (IS_ERR(ts->avdd28)) {
-               error = PTR_ERR(ts->avdd28);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev,
-                               "Failed to get AVDD28 regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->avdd28))
+               return dev_err_probe(dev, PTR_ERR(ts->avdd28), "Failed to get AVDD28 regulator\n");
 
        ts->vddio = devm_regulator_get(dev, "VDDIO");
-       if (IS_ERR(ts->vddio)) {
-               error = PTR_ERR(ts->vddio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev,
-                               "Failed to get VDDIO regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->vddio))
+               return dev_err_probe(dev, PTR_ERR(ts->vddio), "Failed to get VDDIO regulator\n");
 
 retry_get_irq_gpio:
        /* Get the interrupt GPIO pin number */
        gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_INT_NAME, GPIOD_IN);
-       if (IS_ERR(gpiod)) {
-               error = PTR_ERR(gpiod);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get %s GPIO: %d\n",
-                               GOODIX_GPIO_INT_NAME, error);
-               return error;
-       }
+       if (IS_ERR(gpiod))
+               return dev_err_probe(dev, PTR_ERR(gpiod), "Failed to get %s GPIO\n",
+                                    GOODIX_GPIO_INT_NAME);
+
        if (!gpiod && has_acpi_companion(dev) && !added_acpi_mappings) {
                added_acpi_mappings = true;
                if (goodix_add_acpi_gpio_mappings(ts) == 0)
@@ -988,13 +974,9 @@ retry_get_irq_gpio:
 
        /* Get the reset line GPIO pin number */
        gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, ts->gpiod_rst_flags);
-       if (IS_ERR(gpiod)) {
-               error = PTR_ERR(gpiod);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get %s GPIO: %d\n",
-                               GOODIX_GPIO_RST_NAME, error);
-               return error;
-       }
+       if (IS_ERR(gpiod))
+               return dev_err_probe(dev, PTR_ERR(gpiod), "Failed to get %s GPIO\n",
+                                    GOODIX_GPIO_RST_NAME);
 
        ts->gpiod_rst = gpiod;
 
@@ -1517,6 +1499,7 @@ MODULE_DEVICE_TABLE(i2c, goodix_ts_id);
 static const struct acpi_device_id goodix_acpi_match[] = {
        { "GDIX1001", 0 },
        { "GDIX1002", 0 },
+       { "GDX9110", 0 },
        { }
 };
 MODULE_DEVICE_TABLE(acpi, goodix_acpi_match);
index f7cd773..ad6828e 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
index 0aa9d64..b4768b6 100644 (file)
@@ -23,8 +23,8 @@
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/input/touchscreen/iqs7211.c b/drivers/input/touchscreen/iqs7211.c
new file mode 100644 (file)
index 0000000..dc084f8
--- /dev/null
@@ -0,0 +1,2557 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Azoteq IQS7210A/7211A/E Trackpad/Touchscreen Controller
+ *
+ * Copyright (C) 2023 Jeff LaBundy <jeff@labundy.com>
+ */
+
+#include <linux/bits.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+#define IQS7211_PROD_NUM                       0x00
+
+#define IQS7211_EVENT_MASK_ALL                 GENMASK(14, 8)
+#define IQS7211_EVENT_MASK_ALP                 BIT(13)
+#define IQS7211_EVENT_MASK_BTN                 BIT(12)
+#define IQS7211_EVENT_MASK_ATI                 BIT(11)
+#define IQS7211_EVENT_MASK_MOVE                        BIT(10)
+#define IQS7211_EVENT_MASK_GSTR                        BIT(9)
+#define IQS7211_EVENT_MODE                     BIT(8)
+
+#define IQS7211_COMMS_ERROR                    0xEEEE
+#define IQS7211_COMMS_RETRY_MS                 50
+#define IQS7211_COMMS_SLEEP_US                 100
+#define IQS7211_COMMS_TIMEOUT_US               (100 * USEC_PER_MSEC)
+#define IQS7211_RESET_TIMEOUT_MS               150
+#define IQS7211_START_TIMEOUT_US               (1 * USEC_PER_SEC)
+
+#define IQS7211_NUM_RETRIES                    5
+#define IQS7211_NUM_CRX                                8
+#define IQS7211_MAX_CTX                                13
+
+#define IQS7211_MAX_CONTACTS                   2
+#define IQS7211_MAX_CYCLES                     21
+
+/*
+ * The following delay is used during instances that must wait for the open-
+ * drain RDY pin to settle. Its value is calculated as 5*R*C, where R and C
+ * represent typical datasheet values of 4.7k and 100 nF, respectively.
+ */
+#define iqs7211_irq_wait()                     usleep_range(2500, 2600)
+
+enum iqs7211_dev_id {
+       IQS7210A,
+       IQS7211A,
+       IQS7211E,
+};
+
+enum iqs7211_comms_mode {
+       IQS7211_COMMS_MODE_WAIT,
+       IQS7211_COMMS_MODE_FREE,
+       IQS7211_COMMS_MODE_FORCE,
+};
+
+struct iqs7211_reg_field_desc {
+       struct list_head list;
+       u8 addr;
+       u16 mask;
+       u16 val;
+};
+
+enum iqs7211_reg_key_id {
+       IQS7211_REG_KEY_NONE,
+       IQS7211_REG_KEY_PROX,
+       IQS7211_REG_KEY_TOUCH,
+       IQS7211_REG_KEY_TAP,
+       IQS7211_REG_KEY_HOLD,
+       IQS7211_REG_KEY_PALM,
+       IQS7211_REG_KEY_AXIAL_X,
+       IQS7211_REG_KEY_AXIAL_Y,
+       IQS7211_REG_KEY_RESERVED
+};
+
+enum iqs7211_reg_grp_id {
+       IQS7211_REG_GRP_TP,
+       IQS7211_REG_GRP_BTN,
+       IQS7211_REG_GRP_ALP,
+       IQS7211_REG_GRP_SYS,
+       IQS7211_NUM_REG_GRPS
+};
+
+static const char * const iqs7211_reg_grp_names[IQS7211_NUM_REG_GRPS] = {
+       [IQS7211_REG_GRP_TP] = "trackpad",
+       [IQS7211_REG_GRP_BTN] = "button",
+       [IQS7211_REG_GRP_ALP] = "alp",
+};
+
+static const u16 iqs7211_reg_grp_masks[IQS7211_NUM_REG_GRPS] = {
+       [IQS7211_REG_GRP_TP] = IQS7211_EVENT_MASK_GSTR,
+       [IQS7211_REG_GRP_BTN] = IQS7211_EVENT_MASK_BTN,
+       [IQS7211_REG_GRP_ALP] = IQS7211_EVENT_MASK_ALP,
+};
+
+struct iqs7211_event_desc {
+       const char *name;
+       u16 mask;
+       u16 enable;
+       enum iqs7211_reg_grp_id reg_grp;
+       enum iqs7211_reg_key_id reg_key;
+};
+
+static const struct iqs7211_event_desc iqs7210a_kp_events[] = {
+       {
+               .mask = BIT(10),
+               .enable = BIT(13) | BIT(12),
+               .reg_grp = IQS7211_REG_GRP_ALP,
+       },
+       {
+               .name = "event-prox",
+               .mask = BIT(2),
+               .enable = BIT(5) | BIT(4),
+               .reg_grp = IQS7211_REG_GRP_BTN,
+               .reg_key = IQS7211_REG_KEY_PROX,
+       },
+       {
+               .name = "event-touch",
+               .mask = BIT(3),
+               .enable = BIT(5) | BIT(4),
+               .reg_grp = IQS7211_REG_GRP_BTN,
+               .reg_key = IQS7211_REG_KEY_TOUCH,
+       },
+       {
+               .name = "event-tap",
+               .mask = BIT(0),
+               .enable = BIT(0),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-hold",
+               .mask = BIT(1),
+               .enable = BIT(1),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-x-neg",
+               .mask = BIT(2),
+               .enable = BIT(2),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-x-pos",
+               .mask = BIT(3),
+               .enable = BIT(3),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-y-pos",
+               .mask = BIT(4),
+               .enable = BIT(4),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+       {
+               .name = "event-swipe-y-neg",
+               .mask = BIT(5),
+               .enable = BIT(5),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+};
+
+static const struct iqs7211_event_desc iqs7211a_kp_events[] = {
+       {
+               .mask = BIT(14),
+               .reg_grp = IQS7211_REG_GRP_ALP,
+       },
+       {
+               .name = "event-tap",
+               .mask = BIT(0),
+               .enable = BIT(0),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-hold",
+               .mask = BIT(1),
+               .enable = BIT(1),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-x-neg",
+               .mask = BIT(2),
+               .enable = BIT(2),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-x-pos",
+               .mask = BIT(3),
+               .enable = BIT(3),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-y-pos",
+               .mask = BIT(4),
+               .enable = BIT(4),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+       {
+               .name = "event-swipe-y-neg",
+               .mask = BIT(5),
+               .enable = BIT(5),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+};
+
+static const struct iqs7211_event_desc iqs7211e_kp_events[] = {
+       {
+               .mask = BIT(14),
+               .reg_grp = IQS7211_REG_GRP_ALP,
+       },
+       {
+               .name = "event-tap",
+               .mask = BIT(0),
+               .enable = BIT(0),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-tap-double",
+               .mask = BIT(1),
+               .enable = BIT(1),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-tap-triple",
+               .mask = BIT(2),
+               .enable = BIT(2),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_TAP,
+       },
+       {
+               .name = "event-hold",
+               .mask = BIT(3),
+               .enable = BIT(3),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-palm",
+               .mask = BIT(4),
+               .enable = BIT(4),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_PALM,
+       },
+       {
+               .name = "event-swipe-x-pos",
+               .mask = BIT(8),
+               .enable = BIT(8),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-x-neg",
+               .mask = BIT(9),
+               .enable = BIT(9),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+       },
+       {
+               .name = "event-swipe-y-pos",
+               .mask = BIT(10),
+               .enable = BIT(10),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+       {
+               .name = "event-swipe-y-neg",
+               .mask = BIT(11),
+               .enable = BIT(11),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+       },
+       {
+               .name = "event-swipe-x-pos-hold",
+               .mask = BIT(12),
+               .enable = BIT(12),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-x-neg-hold",
+               .mask = BIT(13),
+               .enable = BIT(13),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-y-pos-hold",
+               .mask = BIT(14),
+               .enable = BIT(14),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+       {
+               .name = "event-swipe-y-neg-hold",
+               .mask = BIT(15),
+               .enable = BIT(15),
+               .reg_grp = IQS7211_REG_GRP_TP,
+               .reg_key = IQS7211_REG_KEY_HOLD,
+       },
+};
+
+struct iqs7211_dev_desc {
+       const char *tp_name;
+       const char *kp_name;
+       u16 prod_num;
+       u16 show_reset;
+       u16 ati_error[IQS7211_NUM_REG_GRPS];
+       u16 ati_start[IQS7211_NUM_REG_GRPS];
+       u16 suspend;
+       u16 ack_reset;
+       u16 comms_end;
+       u16 comms_req;
+       int charge_shift;
+       int info_offs;
+       int gesture_offs;
+       int contact_offs;
+       u8 sys_stat;
+       u8 sys_ctrl;
+       u8 alp_config;
+       u8 tp_config;
+       u8 exp_file;
+       u8 kp_enable[IQS7211_NUM_REG_GRPS];
+       u8 gesture_angle;
+       u8 rx_tx_map;
+       u8 cycle_alloc[2];
+       u8 cycle_limit[2];
+       const struct iqs7211_event_desc *kp_events;
+       int num_kp_events;
+       int min_crx_alp;
+       int num_ctx;
+};
+
+static const struct iqs7211_dev_desc iqs7211_devs[] = {
+       [IQS7210A] = {
+               .tp_name = "iqs7210a_trackpad",
+               .kp_name = "iqs7210a_keys",
+               .prod_num = 944,
+               .show_reset = BIT(15),
+               .ati_error = {
+                       [IQS7211_REG_GRP_TP] = BIT(12),
+                       [IQS7211_REG_GRP_BTN] = BIT(0),
+                       [IQS7211_REG_GRP_ALP] = BIT(8),
+               },
+               .ati_start = {
+                       [IQS7211_REG_GRP_TP] = BIT(13),
+                       [IQS7211_REG_GRP_BTN] = BIT(1),
+                       [IQS7211_REG_GRP_ALP] = BIT(9),
+               },
+               .suspend = BIT(11),
+               .ack_reset = BIT(7),
+               .comms_end = BIT(2),
+               .comms_req = BIT(1),
+               .charge_shift = 4,
+               .info_offs = 0,
+               .gesture_offs = 1,
+               .contact_offs = 4,
+               .sys_stat = 0x0A,
+               .sys_ctrl = 0x35,
+               .alp_config = 0x39,
+               .tp_config = 0x4E,
+               .exp_file = 0x57,
+               .kp_enable = {
+                       [IQS7211_REG_GRP_TP] = 0x58,
+                       [IQS7211_REG_GRP_BTN] = 0x37,
+                       [IQS7211_REG_GRP_ALP] = 0x37,
+               },
+               .gesture_angle = 0x5F,
+               .rx_tx_map = 0x60,
+               .cycle_alloc = { 0x66, 0x75, },
+               .cycle_limit = { 10, 6, },
+               .kp_events = iqs7210a_kp_events,
+               .num_kp_events = ARRAY_SIZE(iqs7210a_kp_events),
+               .min_crx_alp = 4,
+               .num_ctx = IQS7211_MAX_CTX - 1,
+       },
+       [IQS7211A] = {
+               .tp_name = "iqs7211a_trackpad",
+               .kp_name = "iqs7211a_keys",
+               .prod_num = 763,
+               .show_reset = BIT(7),
+               .ati_error = {
+                       [IQS7211_REG_GRP_TP] = BIT(3),
+                       [IQS7211_REG_GRP_ALP] = BIT(5),
+               },
+               .ati_start = {
+                       [IQS7211_REG_GRP_TP] = BIT(5),
+                       [IQS7211_REG_GRP_ALP] = BIT(6),
+               },
+               .ack_reset = BIT(7),
+               .comms_req = BIT(4),
+               .charge_shift = 0,
+               .info_offs = 0,
+               .gesture_offs = 1,
+               .contact_offs = 4,
+               .sys_stat = 0x10,
+               .sys_ctrl = 0x50,
+               .tp_config = 0x60,
+               .alp_config = 0x72,
+               .exp_file = 0x74,
+               .kp_enable = {
+                       [IQS7211_REG_GRP_TP] = 0x80,
+               },
+               .gesture_angle = 0x87,
+               .rx_tx_map = 0x90,
+               .cycle_alloc = { 0xA0, 0xB0, },
+               .cycle_limit = { 10, 8, },
+               .kp_events = iqs7211a_kp_events,
+               .num_kp_events = ARRAY_SIZE(iqs7211a_kp_events),
+               .num_ctx = IQS7211_MAX_CTX - 1,
+       },
+       [IQS7211E] = {
+               .tp_name = "iqs7211e_trackpad",
+               .kp_name = "iqs7211e_keys",
+               .prod_num = 1112,
+               .show_reset = BIT(7),
+               .ati_error = {
+                       [IQS7211_REG_GRP_TP] = BIT(3),
+                       [IQS7211_REG_GRP_ALP] = BIT(5),
+               },
+               .ati_start = {
+                       [IQS7211_REG_GRP_TP] = BIT(5),
+                       [IQS7211_REG_GRP_ALP] = BIT(6),
+               },
+               .suspend = BIT(11),
+               .ack_reset = BIT(7),
+               .comms_end = BIT(6),
+               .comms_req = BIT(4),
+               .charge_shift = 0,
+               .info_offs = 1,
+               .gesture_offs = 0,
+               .contact_offs = 2,
+               .sys_stat = 0x0E,
+               .sys_ctrl = 0x33,
+               .tp_config = 0x41,
+               .alp_config = 0x36,
+               .exp_file = 0x4A,
+               .kp_enable = {
+                       [IQS7211_REG_GRP_TP] = 0x4B,
+               },
+               .gesture_angle = 0x55,
+               .rx_tx_map = 0x56,
+               .cycle_alloc = { 0x5D, 0x6C, },
+               .cycle_limit = { 10, 11, },
+               .kp_events = iqs7211e_kp_events,
+               .num_kp_events = ARRAY_SIZE(iqs7211e_kp_events),
+               .num_ctx = IQS7211_MAX_CTX,
+       },
+};
+
+struct iqs7211_prop_desc {
+       const char *name;
+       enum iqs7211_reg_key_id reg_key;
+       u8 reg_addr[IQS7211_NUM_REG_GRPS][ARRAY_SIZE(iqs7211_devs)];
+       int reg_shift;
+       int reg_width;
+       int val_pitch;
+       int val_min;
+       int val_max;
+       const char *label;
+};
+
+static const struct iqs7211_prop_desc iqs7211_props[] = {
+       {
+               .name = "azoteq,ati-frac-div-fine",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x1E,
+                               [IQS7211A] = 0x30,
+                               [IQS7211E] = 0x21,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x22,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x23,
+                               [IQS7211A] = 0x36,
+                               [IQS7211E] = 0x25,
+                       },
+               },
+               .reg_shift = 9,
+               .reg_width = 5,
+               .label = "ATI fine fractional divider",
+       },
+       {
+               .name = "azoteq,ati-frac-mult-coarse",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x1E,
+                               [IQS7211A] = 0x30,
+                               [IQS7211E] = 0x21,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x22,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x23,
+                               [IQS7211A] = 0x36,
+                               [IQS7211E] = 0x25,
+                       },
+               },
+               .reg_shift = 5,
+               .reg_width = 4,
+               .label = "ATI coarse fractional multiplier",
+       },
+       {
+               .name = "azoteq,ati-frac-div-coarse",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x1E,
+                               [IQS7211A] = 0x30,
+                               [IQS7211E] = 0x21,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x22,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x23,
+                               [IQS7211A] = 0x36,
+                               [IQS7211E] = 0x25,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 5,
+               .label = "ATI coarse fractional divider",
+       },
+       {
+               .name = "azoteq,ati-comp-div",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x1F,
+                               [IQS7211E] = 0x22,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x24,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7211E] = 0x26,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .val_max = 31,
+               .label = "ATI compensation divider",
+       },
+       {
+               .name = "azoteq,ati-comp-div",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x24,
+                       },
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_max = 31,
+               .label = "ATI compensation divider",
+       },
+       {
+               .name = "azoteq,ati-comp-div",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7211A] = 0x31,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7211A] = 0x37,
+                       },
+               },
+               .val_max = 31,
+               .label = "ATI compensation divider",
+       },
+       {
+               .name = "azoteq,ati-target",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x20,
+                               [IQS7211A] = 0x32,
+                               [IQS7211E] = 0x23,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x27,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x28,
+                               [IQS7211A] = 0x38,
+                               [IQS7211E] = 0x27,
+                       },
+               },
+               .label = "ATI target",
+       },
+       {
+               .name = "azoteq,ati-base",
+               .reg_addr[IQS7211_REG_GRP_ALP] = {
+                       [IQS7210A] = 0x26,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 8,
+               .label = "ATI base",
+       },
+       {
+               .name = "azoteq,ati-base",
+               .reg_addr[IQS7211_REG_GRP_BTN] = {
+                       [IQS7210A] = 0x26,
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .val_pitch = 8,
+               .label = "ATI base",
+       },
+       {
+               .name = "azoteq,rate-active-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x29,
+                       [IQS7211A] = 0x40,
+                       [IQS7211E] = 0x28,
+               },
+               .label = "active mode report rate",
+       },
+       {
+               .name = "azoteq,rate-touch-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2A,
+                       [IQS7211A] = 0x41,
+                       [IQS7211E] = 0x29,
+               },
+               .label = "idle-touch mode report rate",
+       },
+       {
+               .name = "azoteq,rate-idle-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2B,
+                       [IQS7211A] = 0x42,
+                       [IQS7211E] = 0x2A,
+               },
+               .label = "idle mode report rate",
+       },
+       {
+               .name = "azoteq,rate-lp1-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2C,
+                       [IQS7211A] = 0x43,
+                       [IQS7211E] = 0x2B,
+               },
+               .label = "low-power mode 1 report rate",
+       },
+       {
+               .name = "azoteq,rate-lp2-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2D,
+                       [IQS7211A] = 0x44,
+                       [IQS7211E] = 0x2C,
+               },
+               .label = "low-power mode 2 report rate",
+       },
+       {
+               .name = "azoteq,timeout-active-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2E,
+                       [IQS7211A] = 0x45,
+                       [IQS7211E] = 0x2D,
+               },
+               .val_pitch = 1000,
+               .label = "active mode timeout",
+       },
+       {
+               .name = "azoteq,timeout-touch-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x2F,
+                       [IQS7211A] = 0x46,
+                       [IQS7211E] = 0x2E,
+               },
+               .val_pitch = 1000,
+               .label = "idle-touch mode timeout",
+       },
+       {
+               .name = "azoteq,timeout-idle-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x30,
+                       [IQS7211A] = 0x47,
+                       [IQS7211E] = 0x2F,
+               },
+               .val_pitch = 1000,
+               .label = "idle mode timeout",
+       },
+       {
+               .name = "azoteq,timeout-lp1-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x31,
+                       [IQS7211A] = 0x48,
+                       [IQS7211E] = 0x30,
+               },
+               .val_pitch = 1000,
+               .label = "low-power mode 1 timeout",
+       },
+       {
+               .name = "azoteq,timeout-lp2-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x32,
+                       [IQS7211E] = 0x31,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "trackpad reference value update rate",
+       },
+       {
+               .name = "azoteq,timeout-lp2-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7211A] = 0x49,
+               },
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "trackpad reference value update rate",
+       },
+       {
+               .name = "azoteq,timeout-ati-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x32,
+                       [IQS7211E] = 0x31,
+               },
+               .reg_width = 8,
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "ATI error timeout",
+       },
+       {
+               .name = "azoteq,timeout-ati-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7211A] = 0x35,
+               },
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "ATI error timeout",
+       },
+       {
+               .name = "azoteq,timeout-comms-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x33,
+                       [IQS7211A] = 0x4A,
+                       [IQS7211E] = 0x32,
+               },
+               .label = "communication timeout",
+       },
+       {
+               .name = "azoteq,timeout-press-ms",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x34,
+               },
+               .reg_width = 8,
+               .val_pitch = 1000,
+               .val_max = 60000,
+               .label = "press timeout",
+       },
+       {
+               .name = "azoteq,ati-mode",
+               .reg_addr[IQS7211_REG_GRP_ALP] = {
+                       [IQS7210A] = 0x37,
+               },
+               .reg_shift = 15,
+               .reg_width = 1,
+               .label = "ATI mode",
+       },
+       {
+               .name = "azoteq,ati-mode",
+               .reg_addr[IQS7211_REG_GRP_BTN] = {
+                       [IQS7210A] = 0x37,
+               },
+               .reg_shift = 7,
+               .reg_width = 1,
+               .label = "ATI mode",
+       },
+       {
+               .name = "azoteq,sense-mode",
+               .reg_addr[IQS7211_REG_GRP_ALP] = {
+                       [IQS7210A] = 0x37,
+                       [IQS7211A] = 0x72,
+                       [IQS7211E] = 0x36,
+               },
+               .reg_shift = 8,
+               .reg_width = 1,
+               .label = "sensing mode",
+       },
+       {
+               .name = "azoteq,sense-mode",
+               .reg_addr[IQS7211_REG_GRP_BTN] = {
+                       [IQS7210A] = 0x37,
+               },
+               .reg_shift = 0,
+               .reg_width = 2,
+               .val_max = 2,
+               .label = "sensing mode",
+       },
+       {
+               .name = "azoteq,fosc-freq",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x38,
+                       [IQS7211A] = 0x52,
+                       [IQS7211E] = 0x35,
+               },
+               .reg_shift = 4,
+               .reg_width = 1,
+               .label = "core clock frequency selection",
+       },
+       {
+               .name = "azoteq,fosc-trim",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x38,
+                       [IQS7211A] = 0x52,
+                       [IQS7211E] = 0x35,
+               },
+               .reg_shift = 0,
+               .reg_width = 4,
+               .label = "core clock frequency trim",
+       },
+       {
+               .name = "azoteq,touch-exit",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x3B,
+                               [IQS7211A] = 0x53,
+                               [IQS7211E] = 0x38,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3E,
+                       },
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "touch exit factor",
+       },
+       {
+               .name = "azoteq,touch-enter",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x3B,
+                               [IQS7211A] = 0x53,
+                               [IQS7211E] = 0x38,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3E,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "touch entrance factor",
+       },
+       {
+               .name = "azoteq,thresh",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3C,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x3D,
+                               [IQS7211A] = 0x54,
+                               [IQS7211E] = 0x39,
+                       },
+               },
+               .label = "threshold",
+       },
+       {
+               .name = "azoteq,debounce-exit",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3F,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x40,
+                               [IQS7211A] = 0x56,
+                               [IQS7211E] = 0x3A,
+                       },
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "debounce exit factor",
+       },
+       {
+               .name = "azoteq,debounce-enter",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x3F,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x40,
+                               [IQS7211A] = 0x56,
+                               [IQS7211E] = 0x3A,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "debounce entrance factor",
+       },
+       {
+               .name = "azoteq,conv-frac",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x48,
+                               [IQS7211A] = 0x58,
+                               [IQS7211E] = 0x3D,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x49,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x4A,
+                               [IQS7211A] = 0x59,
+                               [IQS7211E] = 0x3E,
+                       },
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "conversion frequency fractional divider",
+       },
+       {
+               .name = "azoteq,conv-period",
+               .reg_addr = {
+                       [IQS7211_REG_GRP_TP] = {
+                               [IQS7210A] = 0x48,
+                               [IQS7211A] = 0x58,
+                               [IQS7211E] = 0x3D,
+                       },
+                       [IQS7211_REG_GRP_BTN] = {
+                               [IQS7210A] = 0x49,
+                       },
+                       [IQS7211_REG_GRP_ALP] = {
+                               [IQS7210A] = 0x4A,
+                               [IQS7211A] = 0x59,
+                               [IQS7211E] = 0x3E,
+                       },
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "conversion period",
+       },
+       {
+               .name = "azoteq,thresh",
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x55,
+                       [IQS7211A] = 0x67,
+                       [IQS7211E] = 0x48,
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "threshold",
+       },
+       {
+               .name = "azoteq,contact-split",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x55,
+                       [IQS7211A] = 0x67,
+                       [IQS7211E] = 0x48,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "contact split factor",
+       },
+       {
+               .name = "azoteq,trim-x",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x56,
+                       [IQS7211E] = 0x49,
+               },
+               .reg_shift = 0,
+               .reg_width = 8,
+               .label = "horizontal trim width",
+       },
+       {
+               .name = "azoteq,trim-x",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7211A] = 0x68,
+               },
+               .label = "horizontal trim width",
+       },
+       {
+               .name = "azoteq,trim-y",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7210A] = 0x56,
+                       [IQS7211E] = 0x49,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .label = "vertical trim height",
+       },
+       {
+               .name = "azoteq,trim-y",
+               .reg_addr[IQS7211_REG_GRP_SYS] = {
+                       [IQS7211A] = 0x69,
+               },
+               .label = "vertical trim height",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_key = IQS7211_REG_KEY_TAP,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x59,
+                       [IQS7211A] = 0x81,
+                       [IQS7211E] = 0x4C,
+               },
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-mid-ms",
+               .reg_key = IQS7211_REG_KEY_TAP,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7211E] = 0x4D,
+               },
+               .label = "repeated gesture time",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_key = IQS7211_REG_KEY_TAP,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5A,
+                       [IQS7211A] = 0x82,
+                       [IQS7211E] = 0x4E,
+               },
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_key = IQS7211_REG_KEY_HOLD,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5A,
+                       [IQS7211A] = 0x82,
+                       [IQS7211E] = 0x4E,
+               },
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-min-ms",
+               .reg_key = IQS7211_REG_KEY_HOLD,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5B,
+                       [IQS7211A] = 0x83,
+                       [IQS7211E] = 0x4F,
+               },
+               .label = "minimum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5C,
+                       [IQS7211A] = 0x84,
+                       [IQS7211E] = 0x50,
+               },
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-max-ms",
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5C,
+                       [IQS7211A] = 0x84,
+                       [IQS7211E] = 0x50,
+               },
+               .label = "maximum gesture time",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5D,
+                       [IQS7211A] = 0x85,
+                       [IQS7211E] = 0x51,
+               },
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist",
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7210A] = 0x5E,
+                       [IQS7211A] = 0x86,
+                       [IQS7211E] = 0x52,
+               },
+               .label = "gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist-rep",
+               .reg_key = IQS7211_REG_KEY_AXIAL_X,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7211E] = 0x53,
+               },
+               .label = "repeated gesture distance",
+       },
+       {
+               .name = "azoteq,gesture-dist-rep",
+               .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7211E] = 0x54,
+               },
+               .label = "repeated gesture distance",
+       },
+       {
+               .name = "azoteq,thresh",
+               .reg_key = IQS7211_REG_KEY_PALM,
+               .reg_addr[IQS7211_REG_GRP_TP] = {
+                       [IQS7211E] = 0x55,
+               },
+               .reg_shift = 8,
+               .reg_width = 8,
+               .val_max = 42,
+               .label = "threshold",
+       },
+};
+
+static const u8 iqs7211_gesture_angle[] = {
+       0x00, 0x01, 0x02, 0x03,
+       0x04, 0x06, 0x07, 0x08,
+       0x09, 0x0A, 0x0B, 0x0C,
+       0x0E, 0x0F, 0x10, 0x11,
+       0x12, 0x14, 0x15, 0x16,
+       0x17, 0x19, 0x1A, 0x1B,
+       0x1C, 0x1E, 0x1F, 0x21,
+       0x22, 0x23, 0x25, 0x26,
+       0x28, 0x2A, 0x2B, 0x2D,
+       0x2E, 0x30, 0x32, 0x34,
+       0x36, 0x38, 0x3A, 0x3C,
+       0x3E, 0x40, 0x42, 0x45,
+       0x47, 0x4A, 0x4C, 0x4F,
+       0x52, 0x55, 0x58, 0x5B,
+       0x5F, 0x63, 0x66, 0x6B,
+       0x6F, 0x73, 0x78, 0x7E,
+       0x83, 0x89, 0x90, 0x97,
+       0x9E, 0xA7, 0xB0, 0xBA,
+       0xC5, 0xD1, 0xDF, 0xEF,
+};
+
+struct iqs7211_ver_info {
+       __le16 prod_num;
+       __le16 major;
+       __le16 minor;
+       __le32 patch;
+} __packed;
+
+struct iqs7211_touch_data {
+       __le16 abs_x;
+       __le16 abs_y;
+       __le16 pressure;
+       __le16 area;
+} __packed;
+
+struct iqs7211_tp_config {
+       u8 tp_settings;
+       u8 total_rx;
+       u8 total_tx;
+       u8 num_contacts;
+       __le16 max_x;
+       __le16 max_y;
+} __packed;
+
+struct iqs7211_private {
+       const struct iqs7211_dev_desc *dev_desc;
+       struct gpio_desc *reset_gpio;
+       struct gpio_desc *irq_gpio;
+       struct i2c_client *client;
+       struct input_dev *tp_idev;
+       struct input_dev *kp_idev;
+       struct iqs7211_ver_info ver_info;
+       struct iqs7211_tp_config tp_config;
+       struct touchscreen_properties prop;
+       struct list_head reg_field_head;
+       enum iqs7211_comms_mode comms_init;
+       enum iqs7211_comms_mode comms_mode;
+       unsigned int num_contacts;
+       unsigned int kp_code[ARRAY_SIZE(iqs7211e_kp_events)];
+       u8 rx_tx_map[IQS7211_MAX_CTX + 1];
+       u8 cycle_alloc[2][33];
+       u8 exp_file[2];
+       u16 event_mask;
+       u16 ati_start;
+       u16 gesture_cache;
+};
+
+static int iqs7211_irq_poll(struct iqs7211_private *iqs7211, u64 timeout_us)
+{
+       int error, val;
+
+       error = readx_poll_timeout(gpiod_get_value_cansleep, iqs7211->irq_gpio,
+                                  val, val, IQS7211_COMMS_SLEEP_US, timeout_us);
+
+       return val < 0 ? val : error;
+}
+
+static int iqs7211_hard_reset(struct iqs7211_private *iqs7211)
+{
+       if (!iqs7211->reset_gpio)
+               return 0;
+
+       gpiod_set_value_cansleep(iqs7211->reset_gpio, 1);
+
+       /*
+        * The following delay ensures the shared RDY/MCLR pin is sampled in
+        * between periodic assertions by the device and assumes the default
+        * communication timeout has not been overwritten in OTP memory.
+        */
+       if (iqs7211->reset_gpio == iqs7211->irq_gpio)
+               msleep(IQS7211_RESET_TIMEOUT_MS);
+       else
+               usleep_range(1000, 1100);
+
+       gpiod_set_value_cansleep(iqs7211->reset_gpio, 0);
+       if (iqs7211->reset_gpio == iqs7211->irq_gpio)
+               iqs7211_irq_wait();
+
+       return iqs7211_irq_poll(iqs7211, IQS7211_START_TIMEOUT_US);
+}
+
+static int iqs7211_force_comms(struct iqs7211_private *iqs7211)
+{
+       u8 msg_buf[] = { 0xFF, };
+       int ret;
+
+       switch (iqs7211->comms_mode) {
+       case IQS7211_COMMS_MODE_WAIT:
+               return iqs7211_irq_poll(iqs7211, IQS7211_START_TIMEOUT_US);
+
+       case IQS7211_COMMS_MODE_FREE:
+               return 0;
+
+       case IQS7211_COMMS_MODE_FORCE:
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       /*
+        * The device cannot communicate until it asserts its interrupt (RDY)
+        * pin. Attempts to do so while RDY is deasserted return an ACK; how-
+        * ever all write data is ignored, and all read data returns 0xEE.
+        *
+        * Unsolicited communication must be preceded by a special force com-
+        * munication command, after which the device eventually asserts its
+        * RDY pin and agrees to communicate.
+        *
+        * Regardless of whether communication is forced or the result of an
+        * interrupt, the device automatically deasserts its RDY pin once it
+        * detects an I2C stop condition, or a timeout expires.
+        */
+       ret = gpiod_get_value_cansleep(iqs7211->irq_gpio);
+       if (ret < 0)
+               return ret;
+       else if (ret > 0)
+               return 0;
+
+       ret = i2c_master_send(iqs7211->client, msg_buf, sizeof(msg_buf));
+       if (ret < (int)sizeof(msg_buf)) {
+               if (ret >= 0)
+                       ret = -EIO;
+
+               msleep(IQS7211_COMMS_RETRY_MS);
+               return ret;
+       }
+
+       iqs7211_irq_wait();
+
+       return iqs7211_irq_poll(iqs7211, IQS7211_COMMS_TIMEOUT_US);
+}
+
+static int iqs7211_read_burst(struct iqs7211_private *iqs7211,
+                             u8 reg, void *val, u16 val_len)
+{
+       int ret, i;
+       struct i2c_client *client = iqs7211->client;
+       struct i2c_msg msg[] = {
+               {
+                       .addr = client->addr,
+                       .flags = 0,
+                       .len = sizeof(reg),
+                       .buf = &reg,
+               },
+               {
+                       .addr = client->addr,
+                       .flags = I2C_M_RD,
+                       .len = val_len,
+                       .buf = (u8 *)val,
+               },
+       };
+
+       /*
+        * The following loop protects against an edge case in which the RDY
+        * pin is automatically deasserted just as the read is initiated. In
+        * that case, the read must be retried using forced communication.
+        */
+       for (i = 0; i < IQS7211_NUM_RETRIES; i++) {
+               ret = iqs7211_force_comms(iqs7211);
+               if (ret < 0)
+                       continue;
+
+               ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+               if (ret < (int)ARRAY_SIZE(msg)) {
+                       if (ret >= 0)
+                               ret = -EIO;
+
+                       msleep(IQS7211_COMMS_RETRY_MS);
+                       continue;
+               }
+
+               if (get_unaligned_le16(msg[1].buf) == IQS7211_COMMS_ERROR) {
+                       ret = -ENODATA;
+                       continue;
+               }
+
+               ret = 0;
+               break;
+       }
+
+       iqs7211_irq_wait();
+
+       if (ret < 0)
+               dev_err(&client->dev,
+                       "Failed to read from address 0x%02X: %d\n", reg, ret);
+
+       return ret;
+}
+
+static int iqs7211_read_word(struct iqs7211_private *iqs7211, u8 reg, u16 *val)
+{
+       __le16 val_buf;
+       int error;
+
+       error = iqs7211_read_burst(iqs7211, reg, &val_buf, sizeof(val_buf));
+       if (error)
+               return error;
+
+       *val = le16_to_cpu(val_buf);
+
+       return 0;
+}
+
+static int iqs7211_write_burst(struct iqs7211_private *iqs7211,
+                              u8 reg, const void *val, u16 val_len)
+{
+       int msg_len = sizeof(reg) + val_len;
+       int ret, i;
+       struct i2c_client *client = iqs7211->client;
+       u8 *msg_buf;
+
+       msg_buf = kzalloc(msg_len, GFP_KERNEL);
+       if (!msg_buf)
+               return -ENOMEM;
+
+       *msg_buf = reg;
+       memcpy(msg_buf + sizeof(reg), val, val_len);
+
+       /*
+        * The following loop protects against an edge case in which the RDY
+        * pin is automatically asserted just before the force communication
+        * command is sent.
+        *
+        * In that case, the subsequent I2C stop condition tricks the device
+        * into preemptively deasserting the RDY pin and the command must be
+        * sent again.
+        */
+       for (i = 0; i < IQS7211_NUM_RETRIES; i++) {
+               ret = iqs7211_force_comms(iqs7211);
+               if (ret < 0)
+                       continue;
+
+               ret = i2c_master_send(client, msg_buf, msg_len);
+               if (ret < msg_len) {
+                       if (ret >= 0)
+                               ret = -EIO;
+
+                       msleep(IQS7211_COMMS_RETRY_MS);
+                       continue;
+               }
+
+               ret = 0;
+               break;
+       }
+
+       kfree(msg_buf);
+
+       iqs7211_irq_wait();
+
+       if (ret < 0)
+               dev_err(&client->dev,
+                       "Failed to write to address 0x%02X: %d\n", reg, ret);
+
+       return ret;
+}
+
+static int iqs7211_write_word(struct iqs7211_private *iqs7211, u8 reg, u16 val)
+{
+       __le16 val_buf = cpu_to_le16(val);
+
+       return iqs7211_write_burst(iqs7211, reg, &val_buf, sizeof(val_buf));
+}
+
+static int iqs7211_start_comms(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       bool forced_comms;
+       unsigned int val;
+       u16 comms_setup;
+       int error;
+
+       /*
+        * Until forced communication can be enabled, the host must wait for a
+        * communication window each time it intends to elicit a response from
+        * the device.
+        *
+        * Forced communication is not necessary, however, if the host adapter
+        * can support clock stretching. In that case, the device freely clock
+        * stretches until all pending conversions are complete.
+        */
+       forced_comms = device_property_present(&client->dev,
+                                              "azoteq,forced-comms");
+
+       error = device_property_read_u32(&client->dev,
+                                        "azoteq,forced-comms-default", &val);
+       if (error == -EINVAL) {
+               iqs7211->comms_init = IQS7211_COMMS_MODE_WAIT;
+       } else if (error) {
+               dev_err(&client->dev,
+                       "Failed to read default communication mode: %d\n",
+                       error);
+               return error;
+       } else if (val) {
+               iqs7211->comms_init = forced_comms ? IQS7211_COMMS_MODE_FORCE
+                                                  : IQS7211_COMMS_MODE_WAIT;
+       } else {
+               iqs7211->comms_init = forced_comms ? IQS7211_COMMS_MODE_WAIT
+                                                  : IQS7211_COMMS_MODE_FREE;
+       }
+
+       iqs7211->comms_mode = iqs7211->comms_init;
+
+       error = iqs7211_hard_reset(iqs7211);
+       if (error) {
+               dev_err(&client->dev, "Failed to reset device: %d\n", error);
+               return error;
+       }
+
+       error = iqs7211_read_burst(iqs7211, IQS7211_PROD_NUM,
+                                  &iqs7211->ver_info,
+                                  sizeof(iqs7211->ver_info));
+       if (error)
+               return error;
+
+       if (le16_to_cpu(iqs7211->ver_info.prod_num) != dev_desc->prod_num) {
+               dev_err(&client->dev, "Invalid product number: %u\n",
+                       le16_to_cpu(iqs7211->ver_info.prod_num));
+               return -EINVAL;
+       }
+
+       error = iqs7211_read_word(iqs7211, dev_desc->sys_ctrl + 1,
+                                 &comms_setup);
+       if (error)
+               return error;
+
+       if (forced_comms)
+               comms_setup |= dev_desc->comms_req;
+       else
+               comms_setup &= ~dev_desc->comms_req;
+
+       error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl + 1,
+                                  comms_setup | dev_desc->comms_end);
+       if (error)
+               return error;
+
+       if (forced_comms)
+               iqs7211->comms_mode = IQS7211_COMMS_MODE_FORCE;
+       else
+               iqs7211->comms_mode = IQS7211_COMMS_MODE_FREE;
+
+       error = iqs7211_read_burst(iqs7211, dev_desc->exp_file,
+                                  iqs7211->exp_file,
+                                  sizeof(iqs7211->exp_file));
+       if (error)
+               return error;
+
+       error = iqs7211_read_burst(iqs7211, dev_desc->tp_config,
+                                  &iqs7211->tp_config,
+                                  sizeof(iqs7211->tp_config));
+       if (error)
+               return error;
+
+       error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl + 1,
+                                  comms_setup);
+       if (error)
+               return error;
+
+       iqs7211->event_mask = comms_setup & ~IQS7211_EVENT_MASK_ALL;
+       iqs7211->event_mask |= (IQS7211_EVENT_MASK_ATI | IQS7211_EVENT_MODE);
+
+       return 0;
+}
+
+static int iqs7211_init_device(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct iqs7211_reg_field_desc *reg_field;
+       __le16 sys_ctrl[] = {
+               cpu_to_le16(dev_desc->ack_reset),
+               cpu_to_le16(iqs7211->event_mask),
+       };
+       int error, i;
+
+       /*
+        * Acknowledge reset before writing any registers in case the device
+        * suffers a spurious reset during initialization. The communication
+        * mode is configured at this time as well.
+        */
+       error = iqs7211_write_burst(iqs7211, dev_desc->sys_ctrl, sys_ctrl,
+                                   sizeof(sys_ctrl));
+       if (error)
+               return error;
+
+       if (iqs7211->event_mask & dev_desc->comms_req)
+               iqs7211->comms_mode = IQS7211_COMMS_MODE_FORCE;
+       else
+               iqs7211->comms_mode = IQS7211_COMMS_MODE_FREE;
+
+       /*
+        * Take advantage of the stop-bit disable function, if available, to
+        * save the trouble of having to reopen a communication window after
+        * each read or write.
+        */
+       error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl + 1,
+                                  iqs7211->event_mask | dev_desc->comms_end);
+       if (error)
+               return error;
+
+       list_for_each_entry(reg_field, &iqs7211->reg_field_head, list) {
+               u16 new_val = reg_field->val;
+
+               if (reg_field->mask < U16_MAX) {
+                       u16 old_val;
+
+                       error = iqs7211_read_word(iqs7211, reg_field->addr,
+                                                 &old_val);
+                       if (error)
+                               return error;
+
+                       new_val = old_val & ~reg_field->mask;
+                       new_val |= reg_field->val;
+
+                       if (new_val == old_val)
+                               continue;
+               }
+
+               error = iqs7211_write_word(iqs7211, reg_field->addr, new_val);
+               if (error)
+                       return error;
+       }
+
+       error = iqs7211_write_burst(iqs7211, dev_desc->tp_config,
+                                   &iqs7211->tp_config,
+                                   sizeof(iqs7211->tp_config));
+       if (error)
+               return error;
+
+       if (**iqs7211->cycle_alloc) {
+               error = iqs7211_write_burst(iqs7211, dev_desc->rx_tx_map,
+                                           &iqs7211->rx_tx_map,
+                                           dev_desc->num_ctx);
+               if (error)
+                       return error;
+
+               for (i = 0; i < sizeof(dev_desc->cycle_limit); i++) {
+                       error = iqs7211_write_burst(iqs7211,
+                                                   dev_desc->cycle_alloc[i],
+                                                   iqs7211->cycle_alloc[i],
+                                                   dev_desc->cycle_limit[i] * 3);
+                       if (error)
+                               return error;
+               }
+       }
+
+       *sys_ctrl = cpu_to_le16(iqs7211->ati_start);
+
+       return iqs7211_write_burst(iqs7211, dev_desc->sys_ctrl, sys_ctrl,
+                                  sizeof(sys_ctrl));
+}
+
+static int iqs7211_add_field(struct iqs7211_private *iqs7211,
+                            struct iqs7211_reg_field_desc new_field)
+{
+       struct i2c_client *client = iqs7211->client;
+       struct iqs7211_reg_field_desc *reg_field;
+
+       if (!new_field.addr)
+               return 0;
+
+       list_for_each_entry(reg_field, &iqs7211->reg_field_head, list) {
+               if (reg_field->addr != new_field.addr)
+                       continue;
+
+               reg_field->mask |= new_field.mask;
+               reg_field->val |= new_field.val;
+               return 0;
+       }
+
+       reg_field = devm_kzalloc(&client->dev, sizeof(*reg_field), GFP_KERNEL);
+       if (!reg_field)
+               return -ENOMEM;
+
+       reg_field->addr = new_field.addr;
+       reg_field->mask = new_field.mask;
+       reg_field->val = new_field.val;
+
+       list_add(&reg_field->list, &iqs7211->reg_field_head);
+
+       return 0;
+}
+
+static int iqs7211_parse_props(struct iqs7211_private *iqs7211,
+                              struct fwnode_handle *reg_grp_node,
+                              enum iqs7211_reg_grp_id reg_grp,
+                              enum iqs7211_reg_key_id reg_key)
+{
+       struct i2c_client *client = iqs7211->client;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(iqs7211_props); i++) {
+               const char *name = iqs7211_props[i].name;
+               u8 reg_addr = iqs7211_props[i].reg_addr[reg_grp]
+                                                      [iqs7211->dev_desc -
+                                                       iqs7211_devs];
+               int reg_shift = iqs7211_props[i].reg_shift;
+               int reg_width = iqs7211_props[i].reg_width ? : 16;
+               int val_pitch = iqs7211_props[i].val_pitch ? : 1;
+               int val_min = iqs7211_props[i].val_min;
+               int val_max = iqs7211_props[i].val_max;
+               const char *label = iqs7211_props[i].label ? : name;
+               struct iqs7211_reg_field_desc reg_field;
+               unsigned int val;
+               int error;
+
+               if (iqs7211_props[i].reg_key != reg_key)
+                       continue;
+
+               if (!reg_addr)
+                       continue;
+
+               error = fwnode_property_read_u32(reg_grp_node, name, &val);
+               if (error == -EINVAL) {
+                       continue;
+               } else if (error) {
+                       dev_err(&client->dev, "Failed to read %s %s: %d\n",
+                               fwnode_get_name(reg_grp_node), label, error);
+                       return error;
+               }
+
+               if (!val_max)
+                       val_max = GENMASK(reg_width - 1, 0) * val_pitch;
+
+               if (val < val_min || val > val_max) {
+                       dev_err(&client->dev, "Invalid %s: %u\n", label, val);
+                       return -EINVAL;
+               }
+
+               reg_field.addr = reg_addr;
+               reg_field.mask = GENMASK(reg_shift + reg_width - 1, reg_shift);
+               reg_field.val = val / val_pitch << reg_shift;
+
+               error = iqs7211_add_field(iqs7211, reg_field);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+static int iqs7211_parse_event(struct iqs7211_private *iqs7211,
+                              struct fwnode_handle *event_node,
+                              enum iqs7211_reg_grp_id reg_grp,
+                              enum iqs7211_reg_key_id reg_key,
+                              unsigned int *event_code)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       struct iqs7211_reg_field_desc reg_field;
+       unsigned int val;
+       int error;
+
+       error = iqs7211_parse_props(iqs7211, event_node, reg_grp, reg_key);
+       if (error)
+               return error;
+
+       if (reg_key == IQS7211_REG_KEY_AXIAL_X ||
+           reg_key == IQS7211_REG_KEY_AXIAL_Y) {
+               error = fwnode_property_read_u32(event_node,
+                                                "azoteq,gesture-angle", &val);
+               if (!error) {
+                       if (val >= ARRAY_SIZE(iqs7211_gesture_angle)) {
+                               dev_err(&client->dev,
+                                       "Invalid %s gesture angle: %u\n",
+                                       fwnode_get_name(event_node), val);
+                               return -EINVAL;
+                       }
+
+                       reg_field.addr = dev_desc->gesture_angle;
+                       reg_field.mask = U8_MAX;
+                       reg_field.val = iqs7211_gesture_angle[val];
+
+                       error = iqs7211_add_field(iqs7211, reg_field);
+                       if (error)
+                               return error;
+               } else if (error != -EINVAL) {
+                       dev_err(&client->dev,
+                               "Failed to read %s gesture angle: %d\n",
+                               fwnode_get_name(event_node), error);
+                       return error;
+               }
+       }
+
+       error = fwnode_property_read_u32(event_node, "linux,code", event_code);
+       if (error == -EINVAL)
+               error = 0;
+       else if (error)
+               dev_err(&client->dev, "Failed to read %s code: %d\n",
+                       fwnode_get_name(event_node), error);
+
+       return error;
+}
+
+static int iqs7211_parse_cycles(struct iqs7211_private *iqs7211,
+                               struct fwnode_handle *tp_node)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       int num_cycles = dev_desc->cycle_limit[0] + dev_desc->cycle_limit[1];
+       int error, count, i, j, k, cycle_start;
+       unsigned int cycle_alloc[IQS7211_MAX_CYCLES][2];
+       u8 total_rx = iqs7211->tp_config.total_rx;
+       u8 total_tx = iqs7211->tp_config.total_tx;
+
+       for (i = 0; i < IQS7211_MAX_CYCLES * 2; i++)
+               *(cycle_alloc[0] + i) = U8_MAX;
+
+       count = fwnode_property_count_u32(tp_node, "azoteq,channel-select");
+       if (count == -EINVAL) {
+               /*
+                * Assign each sensing cycle's slots (0 and 1) to a channel,
+                * defined as the intersection between two CRx and CTx pins.
+                * A channel assignment of 255 means the slot is unused.
+                */
+               for (i = 0, cycle_start = 0; i < total_tx; i++) {
+                       int cycle_stop = 0;
+
+                       for (j = 0; j < total_rx; j++) {
+                               /*
+                                * Channels formed by CRx0-3 and CRx4-7 are
+                                * bound to slots 0 and 1, respectively.
+                                */
+                               int slot = iqs7211->rx_tx_map[j] < 4 ? 0 : 1;
+                               int chan = i * total_rx + j;
+
+                               for (k = cycle_start; k < num_cycles; k++) {
+                                       if (cycle_alloc[k][slot] < U8_MAX)
+                                               continue;
+
+                                       cycle_alloc[k][slot] = chan;
+                                       break;
+                               }
+
+                               if (k < num_cycles) {
+                                       cycle_stop = max(k, cycle_stop);
+                                       continue;
+                               }
+
+                               dev_err(&client->dev,
+                                       "Insufficient number of cycles\n");
+                               return -EINVAL;
+                       }
+
+                       /*
+                        * Sensing cycles cannot straddle more than one CTx
+                        * pin. As such, the next row's starting cycle must
+                        * be greater than the previous row's highest cycle.
+                        */
+                       cycle_start = cycle_stop + 1;
+               }
+       } else if (count < 0) {
+               dev_err(&client->dev, "Failed to count channels: %d\n", count);
+               return count;
+       } else if (count > num_cycles * 2) {
+               dev_err(&client->dev, "Insufficient number of cycles\n");
+               return -EINVAL;
+       } else if (count > 0) {
+               error = fwnode_property_read_u32_array(tp_node,
+                                                      "azoteq,channel-select",
+                                                      cycle_alloc[0], count);
+               if (error) {
+                       dev_err(&client->dev, "Failed to read channels: %d\n",
+                               error);
+                       return error;
+               }
+
+               for (i = 0; i < count; i++) {
+                       int chan = *(cycle_alloc[0] + i);
+
+                       if (chan == U8_MAX)
+                               continue;
+
+                       if (chan >= total_rx * total_tx) {
+                               dev_err(&client->dev, "Invalid channel: %d\n",
+                                       chan);
+                               return -EINVAL;
+                       }
+
+                       for (j = 0; j < count; j++) {
+                               if (j == i || *(cycle_alloc[0] + j) != chan)
+                                       continue;
+
+                               dev_err(&client->dev, "Duplicate channel: %d\n",
+                                       chan);
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       /*
+        * Once the raw channel assignments have been derived, they must be
+        * packed according to the device's register map.
+        */
+       for (i = 0, cycle_start = 0; i < sizeof(dev_desc->cycle_limit); i++) {
+               int offs = 0;
+
+               for (j = cycle_start;
+                    j < cycle_start + dev_desc->cycle_limit[i]; j++) {
+                       iqs7211->cycle_alloc[i][offs++] = 0x05;
+                       iqs7211->cycle_alloc[i][offs++] = cycle_alloc[j][0];
+                       iqs7211->cycle_alloc[i][offs++] = cycle_alloc[j][1];
+               }
+
+               cycle_start += dev_desc->cycle_limit[i];
+       }
+
+       return 0;
+}
+
+static int iqs7211_parse_tp(struct iqs7211_private *iqs7211,
+                           struct fwnode_handle *tp_node)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       unsigned int pins[IQS7211_MAX_CTX];
+       int error, count, i, j;
+
+       count = fwnode_property_count_u32(tp_node, "azoteq,rx-enable");
+       if (count == -EINVAL) {
+               return 0;
+       } else if (count < 0) {
+               dev_err(&client->dev, "Failed to count CRx pins: %d\n", count);
+               return count;
+       } else if (count > IQS7211_NUM_CRX) {
+               dev_err(&client->dev, "Invalid number of CRx pins\n");
+               return -EINVAL;
+       }
+
+       error = fwnode_property_read_u32_array(tp_node, "azoteq,rx-enable",
+                                              pins, count);
+       if (error) {
+               dev_err(&client->dev, "Failed to read CRx pins: %d\n", error);
+               return error;
+       }
+
+       for (i = 0; i < count; i++) {
+               if (pins[i] >= IQS7211_NUM_CRX) {
+                       dev_err(&client->dev, "Invalid CRx pin: %u\n", pins[i]);
+                       return -EINVAL;
+               }
+
+               iqs7211->rx_tx_map[i] = pins[i];
+       }
+
+       iqs7211->tp_config.total_rx = count;
+
+       count = fwnode_property_count_u32(tp_node, "azoteq,tx-enable");
+       if (count < 0) {
+               dev_err(&client->dev, "Failed to count CTx pins: %d\n", count);
+               return count;
+       } else if (count > dev_desc->num_ctx) {
+               dev_err(&client->dev, "Invalid number of CTx pins\n");
+               return -EINVAL;
+       }
+
+       error = fwnode_property_read_u32_array(tp_node, "azoteq,tx-enable",
+                                              pins, count);
+       if (error) {
+               dev_err(&client->dev, "Failed to read CTx pins: %d\n", error);
+               return error;
+       }
+
+       for (i = 0; i < count; i++) {
+               if (pins[i] >= dev_desc->num_ctx) {
+                       dev_err(&client->dev, "Invalid CTx pin: %u\n", pins[i]);
+                       return -EINVAL;
+               }
+
+               for (j = 0; j < iqs7211->tp_config.total_rx; j++) {
+                       if (iqs7211->rx_tx_map[j] != pins[i])
+                               continue;
+
+                       dev_err(&client->dev, "Conflicting CTx pin: %u\n",
+                               pins[i]);
+                       return -EINVAL;
+               }
+
+               iqs7211->rx_tx_map[iqs7211->tp_config.total_rx + i] = pins[i];
+       }
+
+       iqs7211->tp_config.total_tx = count;
+
+       return iqs7211_parse_cycles(iqs7211, tp_node);
+}
+
+static int iqs7211_parse_alp(struct iqs7211_private *iqs7211,
+                            struct fwnode_handle *alp_node)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       struct iqs7211_reg_field_desc reg_field;
+       int error, count, i;
+
+       count = fwnode_property_count_u32(alp_node, "azoteq,rx-enable");
+       if (count < 0 && count != -EINVAL) {
+               dev_err(&client->dev, "Failed to count CRx pins: %d\n", count);
+               return count;
+       } else if (count > IQS7211_NUM_CRX) {
+               dev_err(&client->dev, "Invalid number of CRx pins\n");
+               return -EINVAL;
+       } else if (count >= 0) {
+               unsigned int pins[IQS7211_NUM_CRX];
+
+               error = fwnode_property_read_u32_array(alp_node,
+                                                      "azoteq,rx-enable",
+                                                      pins, count);
+               if (error) {
+                       dev_err(&client->dev, "Failed to read CRx pins: %d\n",
+                               error);
+                       return error;
+               }
+
+               reg_field.addr = dev_desc->alp_config;
+               reg_field.mask = GENMASK(IQS7211_NUM_CRX - 1, 0);
+               reg_field.val = 0;
+
+               for (i = 0; i < count; i++) {
+                       if (pins[i] < dev_desc->min_crx_alp ||
+                           pins[i] >= IQS7211_NUM_CRX) {
+                               dev_err(&client->dev, "Invalid CRx pin: %u\n",
+                                       pins[i]);
+                               return -EINVAL;
+                       }
+
+                       reg_field.val |= BIT(pins[i]);
+               }
+
+               error = iqs7211_add_field(iqs7211, reg_field);
+               if (error)
+                       return error;
+       }
+
+       count = fwnode_property_count_u32(alp_node, "azoteq,tx-enable");
+       if (count < 0 && count != -EINVAL) {
+               dev_err(&client->dev, "Failed to count CTx pins: %d\n", count);
+               return count;
+       } else if (count > dev_desc->num_ctx) {
+               dev_err(&client->dev, "Invalid number of CTx pins\n");
+               return -EINVAL;
+       } else if (count >= 0) {
+               unsigned int pins[IQS7211_MAX_CTX];
+
+               error = fwnode_property_read_u32_array(alp_node,
+                                                      "azoteq,tx-enable",
+                                                      pins, count);
+               if (error) {
+                       dev_err(&client->dev, "Failed to read CTx pins: %d\n",
+                               error);
+                       return error;
+               }
+
+               reg_field.addr = dev_desc->alp_config + 1;
+               reg_field.mask = GENMASK(dev_desc->num_ctx - 1, 0);
+               reg_field.val = 0;
+
+               for (i = 0; i < count; i++) {
+                       if (pins[i] >= dev_desc->num_ctx) {
+                               dev_err(&client->dev, "Invalid CTx pin: %u\n",
+                                       pins[i]);
+                               return -EINVAL;
+                       }
+
+                       reg_field.val |= BIT(pins[i]);
+               }
+
+               error = iqs7211_add_field(iqs7211, reg_field);
+               if (error)
+                       return error;
+       }
+
+       return 0;
+}
+
+static int (*iqs7211_parse_extra[IQS7211_NUM_REG_GRPS])
+                               (struct iqs7211_private *iqs7211,
+                                struct fwnode_handle *reg_grp_node) = {
+       [IQS7211_REG_GRP_TP] = iqs7211_parse_tp,
+       [IQS7211_REG_GRP_ALP] = iqs7211_parse_alp,
+};
+
+static int iqs7211_parse_reg_grp(struct iqs7211_private *iqs7211,
+                                struct fwnode_handle *reg_grp_node,
+                                enum iqs7211_reg_grp_id reg_grp)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct iqs7211_reg_field_desc reg_field;
+       int error, i;
+
+       error = iqs7211_parse_props(iqs7211, reg_grp_node, reg_grp,
+                                   IQS7211_REG_KEY_NONE);
+       if (error)
+               return error;
+
+       if (iqs7211_parse_extra[reg_grp]) {
+               error = iqs7211_parse_extra[reg_grp](iqs7211, reg_grp_node);
+               if (error)
+                       return error;
+       }
+
+       iqs7211->ati_start |= dev_desc->ati_start[reg_grp];
+
+       reg_field.addr = dev_desc->kp_enable[reg_grp];
+       reg_field.mask = 0;
+       reg_field.val = 0;
+
+       for (i = 0; i < dev_desc->num_kp_events; i++) {
+               const char *event_name = dev_desc->kp_events[i].name;
+               struct fwnode_handle *event_node;
+
+               if (dev_desc->kp_events[i].reg_grp != reg_grp)
+                       continue;
+
+               reg_field.mask |= dev_desc->kp_events[i].enable;
+
+               if (event_name)
+                       event_node = fwnode_get_named_child_node(reg_grp_node,
+                                                                event_name);
+               else
+                       event_node = fwnode_handle_get(reg_grp_node);
+
+               if (!event_node)
+                       continue;
+
+               error = iqs7211_parse_event(iqs7211, event_node,
+                                           dev_desc->kp_events[i].reg_grp,
+                                           dev_desc->kp_events[i].reg_key,
+                                           &iqs7211->kp_code[i]);
+               fwnode_handle_put(event_node);
+               if (error)
+                       return error;
+
+               reg_field.val |= dev_desc->kp_events[i].enable;
+
+               iqs7211->event_mask |= iqs7211_reg_grp_masks[reg_grp];
+       }
+
+       return iqs7211_add_field(iqs7211, reg_field);
+}
+
+static int iqs7211_register_kp(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct input_dev *kp_idev = iqs7211->kp_idev;
+       struct i2c_client *client = iqs7211->client;
+       int error, i;
+
+       for (i = 0; i < dev_desc->num_kp_events; i++)
+               if (iqs7211->kp_code[i])
+                       break;
+
+       if (i == dev_desc->num_kp_events)
+               return 0;
+
+       kp_idev = devm_input_allocate_device(&client->dev);
+       if (!kp_idev)
+               return -ENOMEM;
+
+       iqs7211->kp_idev = kp_idev;
+
+       kp_idev->name = dev_desc->kp_name;
+       kp_idev->id.bustype = BUS_I2C;
+
+       for (i = 0; i < dev_desc->num_kp_events; i++)
+               if (iqs7211->kp_code[i])
+                       input_set_capability(iqs7211->kp_idev, EV_KEY,
+                                            iqs7211->kp_code[i]);
+
+       error = input_register_device(kp_idev);
+       if (error)
+               dev_err(&client->dev, "Failed to register %s: %d\n",
+                       kp_idev->name, error);
+
+       return error;
+}
+
+static int iqs7211_register_tp(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct touchscreen_properties *prop = &iqs7211->prop;
+       struct input_dev *tp_idev = iqs7211->tp_idev;
+       struct i2c_client *client = iqs7211->client;
+       int error;
+
+       error = device_property_read_u32(&client->dev, "azoteq,num-contacts",
+                                        &iqs7211->num_contacts);
+       if (error == -EINVAL) {
+               return 0;
+       } else if (error) {
+               dev_err(&client->dev, "Failed to read number of contacts: %d\n",
+                       error);
+               return error;
+       } else if (iqs7211->num_contacts > IQS7211_MAX_CONTACTS) {
+               dev_err(&client->dev, "Invalid number of contacts: %u\n",
+                       iqs7211->num_contacts);
+               return -EINVAL;
+       }
+
+       iqs7211->tp_config.num_contacts = iqs7211->num_contacts ? : 1;
+
+       if (!iqs7211->num_contacts)
+               return 0;
+
+       iqs7211->event_mask |= IQS7211_EVENT_MASK_MOVE;
+
+       tp_idev = devm_input_allocate_device(&client->dev);
+       if (!tp_idev)
+               return -ENOMEM;
+
+       iqs7211->tp_idev = tp_idev;
+
+       tp_idev->name = dev_desc->tp_name;
+       tp_idev->id.bustype = BUS_I2C;
+
+       input_set_abs_params(tp_idev, ABS_MT_POSITION_X,
+                            0, le16_to_cpu(iqs7211->tp_config.max_x), 0, 0);
+
+       input_set_abs_params(tp_idev, ABS_MT_POSITION_Y,
+                            0, le16_to_cpu(iqs7211->tp_config.max_y), 0, 0);
+
+       input_set_abs_params(tp_idev, ABS_MT_PRESSURE, 0, U16_MAX, 0, 0);
+
+       touchscreen_parse_properties(tp_idev, true, prop);
+
+       /*
+        * The device reserves 0xFFFF for coordinates that correspond to slots
+        * which are not in a state of touch.
+        */
+       if (prop->max_x >= U16_MAX || prop->max_y >= U16_MAX) {
+               dev_err(&client->dev, "Invalid trackpad size: %u*%u\n",
+                       prop->max_x, prop->max_y);
+               return -EINVAL;
+       }
+
+       iqs7211->tp_config.max_x = cpu_to_le16(prop->max_x);
+       iqs7211->tp_config.max_y = cpu_to_le16(prop->max_y);
+
+       error = input_mt_init_slots(tp_idev, iqs7211->num_contacts,
+                                   INPUT_MT_DIRECT);
+       if (error) {
+               dev_err(&client->dev, "Failed to initialize slots: %d\n",
+                       error);
+               return error;
+       }
+
+       error = input_register_device(tp_idev);
+       if (error)
+               dev_err(&client->dev, "Failed to register %s: %d\n",
+                       tp_idev->name, error);
+
+       return error;
+}
+
+static int iqs7211_report(struct iqs7211_private *iqs7211)
+{
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       struct i2c_client *client = iqs7211->client;
+       struct iqs7211_touch_data *touch_data;
+       u16 info_flags, charge_mode, gesture_flags;
+       __le16 status[12];
+       int error, i;
+
+       error = iqs7211_read_burst(iqs7211, dev_desc->sys_stat, status,
+                                  dev_desc->contact_offs * sizeof(__le16) +
+                                  iqs7211->num_contacts * sizeof(*touch_data));
+       if (error)
+               return error;
+
+       info_flags = le16_to_cpu(status[dev_desc->info_offs]);
+
+       if (info_flags & dev_desc->show_reset) {
+               dev_err(&client->dev, "Unexpected device reset\n");
+
+               /*
+                * The device may or may not expect forced communication after
+                * it exits hardware reset, so the corresponding state machine
+                * must be reset as well.
+                */
+               iqs7211->comms_mode = iqs7211->comms_init;
+
+               return iqs7211_init_device(iqs7211);
+       }
+
+       for (i = 0; i < ARRAY_SIZE(dev_desc->ati_error); i++) {
+               if (!(info_flags & dev_desc->ati_error[i]))
+                       continue;
+
+               dev_err(&client->dev, "Unexpected %s ATI error\n",
+                       iqs7211_reg_grp_names[i]);
+               return 0;
+       }
+
+       for (i = 0; i < iqs7211->num_contacts; i++) {
+               u16 pressure;
+
+               touch_data = (struct iqs7211_touch_data *)
+                            &status[dev_desc->contact_offs] + i;
+               pressure = le16_to_cpu(touch_data->pressure);
+
+               input_mt_slot(iqs7211->tp_idev, i);
+               if (input_mt_report_slot_state(iqs7211->tp_idev, MT_TOOL_FINGER,
+                                              pressure != 0)) {
+                       touchscreen_report_pos(iqs7211->tp_idev, &iqs7211->prop,
+                                              le16_to_cpu(touch_data->abs_x),
+                                              le16_to_cpu(touch_data->abs_y),
+                                              true);
+                       input_report_abs(iqs7211->tp_idev, ABS_MT_PRESSURE,
+                                        pressure);
+               }
+       }
+
+       if (iqs7211->num_contacts) {
+               input_mt_sync_frame(iqs7211->tp_idev);
+               input_sync(iqs7211->tp_idev);
+       }
+
+       if (!iqs7211->kp_idev)
+               return 0;
+
+       charge_mode = info_flags & GENMASK(dev_desc->charge_shift + 2,
+                                          dev_desc->charge_shift);
+       charge_mode >>= dev_desc->charge_shift;
+
+       /*
+        * A charging mode higher than 2 (idle mode) indicates the device last
+        * operated in low-power mode and intends to express an ALP event.
+        */
+       if (info_flags & dev_desc->kp_events->mask && charge_mode > 2) {
+               input_report_key(iqs7211->kp_idev, *iqs7211->kp_code, 1);
+               input_sync(iqs7211->kp_idev);
+
+               input_report_key(iqs7211->kp_idev, *iqs7211->kp_code, 0);
+       }
+
+       for (i = 0; i < dev_desc->num_kp_events; i++) {
+               if (dev_desc->kp_events[i].reg_grp != IQS7211_REG_GRP_BTN)
+                       continue;
+
+               input_report_key(iqs7211->kp_idev, iqs7211->kp_code[i],
+                                info_flags & dev_desc->kp_events[i].mask);
+       }
+
+       gesture_flags = le16_to_cpu(status[dev_desc->gesture_offs]);
+
+       for (i = 0; i < dev_desc->num_kp_events; i++) {
+               enum iqs7211_reg_key_id reg_key = dev_desc->kp_events[i].reg_key;
+               u16 mask = dev_desc->kp_events[i].mask;
+
+               if (dev_desc->kp_events[i].reg_grp != IQS7211_REG_GRP_TP)
+                       continue;
+
+               if ((gesture_flags ^ iqs7211->gesture_cache) & mask)
+                       input_report_key(iqs7211->kp_idev, iqs7211->kp_code[i],
+                                        gesture_flags & mask);
+
+               iqs7211->gesture_cache &= ~mask;
+
+               /*
+                * Hold and palm gestures persist while the contact remains in
+                * place; all others are momentary and hence are followed by a
+                * complementary release event.
+                */
+               if (reg_key == IQS7211_REG_KEY_HOLD ||
+                   reg_key == IQS7211_REG_KEY_PALM) {
+                       iqs7211->gesture_cache |= gesture_flags & mask;
+                       gesture_flags &= ~mask;
+               }
+       }
+
+       if (gesture_flags) {
+               input_sync(iqs7211->kp_idev);
+
+               for (i = 0; i < dev_desc->num_kp_events; i++)
+                       if (dev_desc->kp_events[i].reg_grp == IQS7211_REG_GRP_TP &&
+                           gesture_flags & dev_desc->kp_events[i].mask)
+                               input_report_key(iqs7211->kp_idev,
+                                                iqs7211->kp_code[i], 0);
+       }
+
+       input_sync(iqs7211->kp_idev);
+
+       return 0;
+}
+
+static irqreturn_t iqs7211_irq(int irq, void *context)
+{
+       struct iqs7211_private *iqs7211 = context;
+
+       return iqs7211_report(iqs7211) ? IRQ_NONE : IRQ_HANDLED;
+}
+
+static int iqs7211_suspend(struct device *dev)
+{
+       struct iqs7211_private *iqs7211 = dev_get_drvdata(dev);
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       int error;
+
+       if (!dev_desc->suspend || device_may_wakeup(dev))
+               return 0;
+
+       /*
+        * I2C communication prompts the device to assert its RDY pin if it is
+        * not already asserted. As such, the interrupt must be disabled so as
+        * to prevent reentrant interrupts.
+        */
+       disable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+       error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl,
+                                  dev_desc->suspend);
+
+       enable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+       return error;
+}
+
+static int iqs7211_resume(struct device *dev)
+{
+       struct iqs7211_private *iqs7211 = dev_get_drvdata(dev);
+       const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+       __le16 sys_ctrl[] = {
+               0,
+               cpu_to_le16(iqs7211->event_mask),
+       };
+       int error;
+
+       if (!dev_desc->suspend || device_may_wakeup(dev))
+               return 0;
+
+       disable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+       /*
+        * Forced communication, if in use, must be explicitly enabled as part
+        * of the wake-up command.
+        */
+       error = iqs7211_write_burst(iqs7211, dev_desc->sys_ctrl, sys_ctrl,
+                                   sizeof(sys_ctrl));
+
+       enable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+       return error;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(iqs7211_pm, iqs7211_suspend, iqs7211_resume);
+
+static ssize_t fw_info_show(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       struct iqs7211_private *iqs7211 = dev_get_drvdata(dev);
+
+       return scnprintf(buf, PAGE_SIZE, "%u.%u.%u.%u:%u.%u\n",
+                        le16_to_cpu(iqs7211->ver_info.prod_num),
+                        le32_to_cpu(iqs7211->ver_info.patch),
+                        le16_to_cpu(iqs7211->ver_info.major),
+                        le16_to_cpu(iqs7211->ver_info.minor),
+                        iqs7211->exp_file[1], iqs7211->exp_file[0]);
+}
+
+static DEVICE_ATTR_RO(fw_info);
+
+static struct attribute *iqs7211_attrs[] = {
+       &dev_attr_fw_info.attr,
+       NULL
+};
+ATTRIBUTE_GROUPS(iqs7211);
+
+static const struct of_device_id iqs7211_of_match[] = {
+       {
+               .compatible = "azoteq,iqs7210a",
+               .data = &iqs7211_devs[IQS7210A],
+       },
+       {
+               .compatible = "azoteq,iqs7211a",
+               .data = &iqs7211_devs[IQS7211A],
+       },
+       {
+               .compatible = "azoteq,iqs7211e",
+               .data = &iqs7211_devs[IQS7211E],
+       },
+       { }
+};
+MODULE_DEVICE_TABLE(of, iqs7211_of_match);
+
+static int iqs7211_probe(struct i2c_client *client)
+{
+       struct iqs7211_private *iqs7211;
+       enum iqs7211_reg_grp_id reg_grp;
+       unsigned long irq_flags;
+       bool shared_irq;
+       int error, irq;
+
+       iqs7211 = devm_kzalloc(&client->dev, sizeof(*iqs7211), GFP_KERNEL);
+       if (!iqs7211)
+               return -ENOMEM;
+
+       i2c_set_clientdata(client, iqs7211);
+       iqs7211->client = client;
+
+       INIT_LIST_HEAD(&iqs7211->reg_field_head);
+
+       iqs7211->dev_desc = device_get_match_data(&client->dev);
+       if (!iqs7211->dev_desc)
+               return -ENODEV;
+
+       shared_irq = iqs7211->dev_desc->num_ctx == IQS7211_MAX_CTX;
+
+       /*
+        * The RDY pin behaves as an interrupt, but must also be polled ahead
+        * of unsolicited I2C communication. As such, it is first opened as a
+        * GPIO and then passed to gpiod_to_irq() to register the interrupt.
+        *
+        * If an extra CTx pin is present, the RDY and MCLR pins are combined
+        * into a single bidirectional pin. In that case, the platform's GPIO
+        * must be configured as an open-drain output.
+        */
+       iqs7211->irq_gpio = devm_gpiod_get(&client->dev, "irq",
+                                          shared_irq ? GPIOD_OUT_LOW
+                                                     : GPIOD_IN);
+       if (IS_ERR(iqs7211->irq_gpio)) {
+               error = PTR_ERR(iqs7211->irq_gpio);
+               dev_err(&client->dev, "Failed to request IRQ GPIO: %d\n",
+                       error);
+               return error;
+       }
+
+       if (shared_irq) {
+               iqs7211->reset_gpio = iqs7211->irq_gpio;
+       } else {
+               iqs7211->reset_gpio = devm_gpiod_get_optional(&client->dev,
+                                                             "reset",
+                                                             GPIOD_OUT_HIGH);
+               if (IS_ERR(iqs7211->reset_gpio)) {
+                       error = PTR_ERR(iqs7211->reset_gpio);
+                       dev_err(&client->dev,
+                               "Failed to request reset GPIO: %d\n", error);
+                       return error;
+               }
+       }
+
+       error = iqs7211_start_comms(iqs7211);
+       if (error)
+               return error;
+
+       for (reg_grp = 0; reg_grp < IQS7211_NUM_REG_GRPS; reg_grp++) {
+               const char *reg_grp_name = iqs7211_reg_grp_names[reg_grp];
+               struct fwnode_handle *reg_grp_node;
+
+               if (reg_grp_name)
+                       reg_grp_node = device_get_named_child_node(&client->dev,
+                                                                  reg_grp_name);
+               else
+                       reg_grp_node = fwnode_handle_get(dev_fwnode(&client->dev));
+
+               if (!reg_grp_node)
+                       continue;
+
+               error = iqs7211_parse_reg_grp(iqs7211, reg_grp_node, reg_grp);
+               fwnode_handle_put(reg_grp_node);
+               if (error)
+                       return error;
+       }
+
+       error = iqs7211_register_kp(iqs7211);
+       if (error)
+               return error;
+
+       error = iqs7211_register_tp(iqs7211);
+       if (error)
+               return error;
+
+       error = iqs7211_init_device(iqs7211);
+       if (error)
+               return error;
+
+       irq = gpiod_to_irq(iqs7211->irq_gpio);
+       if (irq < 0)
+               return irq;
+
+       irq_flags = gpiod_is_active_low(iqs7211->irq_gpio) ? IRQF_TRIGGER_LOW
+                                                          : IRQF_TRIGGER_HIGH;
+       irq_flags |= IRQF_ONESHOT;
+
+       error = devm_request_threaded_irq(&client->dev, irq, NULL, iqs7211_irq,
+                                         irq_flags, client->name, iqs7211);
+       if (error)
+               dev_err(&client->dev, "Failed to request IRQ: %d\n", error);
+
+       return error;
+}
+
+static struct i2c_driver iqs7211_i2c_driver = {
+       .probe = iqs7211_probe,
+       .driver = {
+               .name = "iqs7211",
+               .of_match_table = iqs7211_of_match,
+               .dev_groups = iqs7211_groups,
+               .pm = pm_sleep_ptr(&iqs7211_pm),
+       },
+};
+module_i2c_driver(iqs7211_i2c_driver);
+
+MODULE_AUTHOR("Jeff LaBundy <jeff@labundy.com>");
+MODULE_DESCRIPTION("Azoteq IQS7210A/7211A/E Trackpad/Touchscreen Controller");
+MODULE_LICENSE("GPL");
index 15b5cb7..9bad8b9 100644 (file)
@@ -198,54 +198,36 @@ static void lpc32xx_ts_close(struct input_dev *dev)
 
 static int lpc32xx_ts_probe(struct platform_device *pdev)
 {
+       struct device *dev = &pdev->dev;
        struct lpc32xx_tsc *tsc;
        struct input_dev *input;
-       struct resource *res;
-       resource_size_t size;
        int irq;
        int error;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "Can't get memory resource\n");
-               return -ENOENT;
-       }
-
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return irq;
 
-       tsc = kzalloc(sizeof(*tsc), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!tsc || !input) {
-               dev_err(&pdev->dev, "failed allocating memory\n");
-               error = -ENOMEM;
-               goto err_free_mem;
-       }
+       tsc = devm_kzalloc(dev, sizeof(*tsc), GFP_KERNEL);
+       if (!tsc)
+               return -ENOMEM;
 
-       tsc->dev = input;
        tsc->irq = irq;
 
-       size = resource_size(res);
-
-       if (!request_mem_region(res->start, size, pdev->name)) {
-               dev_err(&pdev->dev, "TSC registers are not free\n");
-               error = -EBUSY;
-               goto err_free_mem;
-       }
+       tsc->tsc_base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(tsc->tsc_base))
+               return PTR_ERR(tsc->tsc_base);
 
-       tsc->tsc_base = ioremap(res->start, size);
-       if (!tsc->tsc_base) {
-               dev_err(&pdev->dev, "Can't map memory\n");
-               error = -ENOMEM;
-               goto err_release_mem;
-       }
-
-       tsc->clk = clk_get(&pdev->dev, NULL);
+       tsc->clk = devm_clk_get(dev, NULL);
        if (IS_ERR(tsc->clk)) {
                dev_err(&pdev->dev, "failed getting clock\n");
-               error = PTR_ERR(tsc->clk);
-               goto err_unmap;
+               return PTR_ERR(tsc->clk);
+       }
+
+       input = devm_input_allocate_device(dev);
+       if (!input) {
+               dev_err(&pdev->dev, "failed allocating input device\n");
+               return -ENOMEM;
        }
 
        input->name = MOD_NAME;
@@ -254,68 +236,33 @@ static int lpc32xx_ts_probe(struct platform_device *pdev)
        input->id.vendor = 0x0001;
        input->id.product = 0x0002;
        input->id.version = 0x0100;
-       input->dev.parent = &pdev->dev;
        input->open = lpc32xx_ts_open;
        input->close = lpc32xx_ts_close;
 
-       input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
-       input->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+       input_set_capability(input, EV_KEY, BTN_TOUCH);
        input_set_abs_params(input, ABS_X, LPC32XX_TSC_MIN_XY_VAL,
                             LPC32XX_TSC_MAX_XY_VAL, 0, 0);
        input_set_abs_params(input, ABS_Y, LPC32XX_TSC_MIN_XY_VAL,
                             LPC32XX_TSC_MAX_XY_VAL, 0, 0);
 
        input_set_drvdata(input, tsc);
+       tsc->dev = input;
 
-       error = request_irq(tsc->irq, lpc32xx_ts_interrupt,
-                           0, pdev->name, tsc);
+       error = devm_request_irq(dev, tsc->irq, lpc32xx_ts_interrupt,
+                                0, pdev->name, tsc);
        if (error) {
                dev_err(&pdev->dev, "failed requesting interrupt\n");
-               goto err_put_clock;
+               return error;
        }
 
        error = input_register_device(input);
        if (error) {
                dev_err(&pdev->dev, "failed registering input device\n");
-               goto err_free_irq;
+               return error;
        }
 
        platform_set_drvdata(pdev, tsc);
-       device_init_wakeup(&pdev->dev, 1);
-
-       return 0;
-
-err_free_irq:
-       free_irq(tsc->irq, tsc);
-err_put_clock:
-       clk_put(tsc->clk);
-err_unmap:
-       iounmap(tsc->tsc_base);
-err_release_mem:
-       release_mem_region(res->start, size);
-err_free_mem:
-       input_free_device(input);
-       kfree(tsc);
-
-       return error;
-}
-
-static int lpc32xx_ts_remove(struct platform_device *pdev)
-{
-       struct lpc32xx_tsc *tsc = platform_get_drvdata(pdev);
-       struct resource *res;
-
-       free_irq(tsc->irq, tsc);
-
-       input_unregister_device(tsc->dev);
-
-       clk_put(tsc->clk);
-
-       iounmap(tsc->tsc_base);
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       release_mem_region(res->start, resource_size(res));
-
-       kfree(tsc);
+       device_init_wakeup(&pdev->dev, true);
 
        return 0;
 }
@@ -384,7 +331,6 @@ MODULE_DEVICE_TABLE(of, lpc32xx_tsc_of_match);
 
 static struct platform_driver lpc32xx_ts_driver = {
        .probe          = lpc32xx_ts_probe,
-       .remove         = lpc32xx_ts_remove,
        .driver         = {
                .name   = MOD_NAME,
                .pm     = LPC32XX_TS_PM_OPS,
index 32896e5..2ac4483 100644 (file)
@@ -1451,13 +1451,8 @@ static int mip4_probe(struct i2c_client *client)
 
        ts->gpio_ce = devm_gpiod_get_optional(&client->dev,
                                              "ce", GPIOD_OUT_LOW);
-       if (IS_ERR(ts->gpio_ce)) {
-               error = PTR_ERR(ts->gpio_ce);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->gpio_ce))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->gpio_ce), "Failed to get gpio\n");
 
        error = mip4_power_on(ts);
        if (error)
index ac12494..af233b6 100644 (file)
@@ -7,7 +7,6 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/i2c.h>
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
@@ -43,6 +42,7 @@
 /* Touchscreen absolute values */
 #define MMS114_MAX_AREA                        0xff
 
+#define MMS114_MAX_TOUCHKEYS           15
 #define MMS114_MAX_TOUCH               10
 #define MMS114_EVENT_SIZE              8
 #define MMS136_EVENT_SIZE              6
@@ -70,6 +70,9 @@ struct mms114_data {
        unsigned int            contact_threshold;
        unsigned int            moving_threshold;
 
+       u32 keycodes[MMS114_MAX_TOUCHKEYS];
+       int num_keycodes;
+
        /* Use cache data for mode control register(write only) */
        u8                      cache_mode_control;
 };
@@ -167,11 +170,6 @@ static void mms114_process_mt(struct mms114_data *data, struct mms114_touch *tou
                return;
        }
 
-       if (touch->type != MMS114_TYPE_TOUCHSCREEN) {
-               dev_err(&client->dev, "Wrong touch type (%d)\n", touch->type);
-               return;
-       }
-
        id = touch->id - 1;
        x = touch->x_lo | touch->x_hi << 8;
        y = touch->y_lo | touch->y_hi << 8;
@@ -191,9 +189,33 @@ static void mms114_process_mt(struct mms114_data *data, struct mms114_touch *tou
        }
 }
 
+static void mms114_process_touchkey(struct mms114_data *data,
+                                   struct mms114_touch *touch)
+{
+       struct i2c_client *client = data->client;
+       struct input_dev *input_dev = data->input_dev;
+       unsigned int keycode_id;
+
+       if (touch->id == 0)
+               return;
+
+       if (touch->id > data->num_keycodes) {
+               dev_err(&client->dev, "Wrong touch id for touchkey (%d)\n",
+                       touch->id);
+               return;
+       }
+
+       keycode_id = touch->id - 1;
+       dev_dbg(&client->dev, "keycode id: %d, pressed: %d\n", keycode_id,
+               touch->pressed);
+
+       input_report_key(input_dev, data->keycodes[keycode_id], touch->pressed);
+}
+
 static irqreturn_t mms114_interrupt(int irq, void *dev_id)
 {
        struct mms114_data *data = dev_id;
+       struct i2c_client *client = data->client;
        struct input_dev *input_dev = data->input_dev;
        struct mms114_touch touch[MMS114_MAX_TOUCH];
        int packet_size;
@@ -223,8 +245,22 @@ static irqreturn_t mms114_interrupt(int irq, void *dev_id)
        if (error < 0)
                goto out;
 
-       for (index = 0; index < touch_size; index++)
-               mms114_process_mt(data, touch + index);
+       for (index = 0; index < touch_size; index++) {
+               switch (touch[index].type) {
+               case MMS114_TYPE_TOUCHSCREEN:
+                       mms114_process_mt(data, touch + index);
+                       break;
+
+               case MMS114_TYPE_TOUCHKEY:
+                       mms114_process_touchkey(data, touch + index);
+                       break;
+
+               default:
+                       dev_err(&client->dev, "Wrong touch type (%d)\n",
+                               touch[index].type);
+                       break;
+               }
+       }
 
        input_mt_report_pointer_emulation(data->input_dev, true);
        input_sync(data->input_dev);
@@ -446,6 +482,7 @@ static int mms114_probe(struct i2c_client *client)
        struct input_dev *input_dev;
        const void *match_data;
        int error;
+       int i;
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
                dev_err(&client->dev, "Not supported I2C adapter\n");
@@ -469,6 +506,42 @@ static int mms114_probe(struct i2c_client *client)
 
        data->type = (enum mms_type)match_data;
 
+       data->num_keycodes = device_property_count_u32(&client->dev,
+                                                      "linux,keycodes");
+       if (data->num_keycodes == -EINVAL) {
+               data->num_keycodes = 0;
+       } else if (data->num_keycodes < 0) {
+               dev_err(&client->dev,
+                       "Unable to parse linux,keycodes property: %d\n",
+                       data->num_keycodes);
+               return data->num_keycodes;
+       } else if (data->num_keycodes > MMS114_MAX_TOUCHKEYS) {
+               dev_warn(&client->dev,
+                       "Found %d linux,keycodes but max is %d, ignoring the rest\n",
+                        data->num_keycodes, MMS114_MAX_TOUCHKEYS);
+               data->num_keycodes = MMS114_MAX_TOUCHKEYS;
+       }
+
+       if (data->num_keycodes > 0) {
+               error = device_property_read_u32_array(&client->dev,
+                                                      "linux,keycodes",
+                                                      data->keycodes,
+                                                      data->num_keycodes);
+               if (error) {
+                       dev_err(&client->dev,
+                               "Unable to read linux,keycodes values: %d\n",
+                               error);
+                       return error;
+               }
+
+               input_dev->keycode = data->keycodes;
+               input_dev->keycodemax = data->num_keycodes;
+               input_dev->keycodesize = sizeof(data->keycodes[0]);
+               for (i = 0; i < data->num_keycodes; i++)
+                       input_set_capability(input_dev,
+                                            EV_KEY, data->keycodes[i]);
+       }
+
        input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_X);
        input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_Y);
        input_set_abs_params(input_dev, ABS_MT_PRESSURE, 0, 255, 0, 0);
index 7f7d879..1a797e4 100644 (file)
@@ -1,9 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * Driver for Novatek i2c touchscreen controller as found on
- * the Acer Iconia One 7 B1-750 tablet. The Touchscreen controller
- * model-number is unknown. Android calls this a "NVT-ts" touchscreen,
- * but that may apply to other Novatek controller models too.
+ * Driver for Novatek NT11205 i2c touchscreen controller as found
+ * on the Acer Iconia One 7 B1-750 tablet.
  *
  * Copyright (c) 2023 Hans de Goede <hdegoede@redhat.com>
  */
@@ -272,7 +270,7 @@ static int nvt_ts_probe(struct i2c_client *client)
 
        error = input_register_device(input);
        if (error) {
-               dev_err(dev, "failed to request irq: %d\n", error);
+               dev_err(dev, "failed to register input device: %d\n", error);
                return error;
        }
 
@@ -296,6 +294,6 @@ static struct i2c_driver nvt_ts_driver = {
 
 module_i2c_driver(nvt_ts_driver);
 
-MODULE_DESCRIPTION("Novatek NVT-ts touchscreen driver");
+MODULE_DESCRIPTION("Novatek NT11205 touchscreen driver");
 MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
 MODULE_LICENSE("GPL");
index 554e179..4ede068 100644 (file)
@@ -13,8 +13,8 @@
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
 #include <linux/interrupt.h>
-#include <linux/of_device.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/slab.h>
 
 #define PIXCIR_MAX_SLOTS       5 /* Max fingers supported by driver */
@@ -515,41 +515,27 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client)
        input_set_drvdata(input, tsdata);
 
        tsdata->gpio_attb = devm_gpiod_get(dev, "attb", GPIOD_IN);
-       if (IS_ERR(tsdata->gpio_attb)) {
-               error = PTR_ERR(tsdata->gpio_attb);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to request ATTB gpio: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(tsdata->gpio_attb))
+               return dev_err_probe(dev, PTR_ERR(tsdata->gpio_attb),
+                                    "Failed to request ATTB gpio\n");
 
        tsdata->gpio_reset = devm_gpiod_get_optional(dev, "reset",
                                                     GPIOD_OUT_LOW);
-       if (IS_ERR(tsdata->gpio_reset)) {
-               error = PTR_ERR(tsdata->gpio_reset);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to request RESET gpio: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(tsdata->gpio_reset))
+               return dev_err_probe(dev, PTR_ERR(tsdata->gpio_reset),
+                                    "Failed to request RESET gpio\n");
 
        tsdata->gpio_wake = devm_gpiod_get_optional(dev, "wake",
                                                    GPIOD_OUT_HIGH);
-       if (IS_ERR(tsdata->gpio_wake)) {
-               error = PTR_ERR(tsdata->gpio_wake);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get wake gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(tsdata->gpio_wake))
+               return dev_err_probe(dev, PTR_ERR(tsdata->gpio_wake),
+                                    "Failed to get wake gpio\n");
 
        tsdata->gpio_enable = devm_gpiod_get_optional(dev, "enable",
                                                      GPIOD_OUT_HIGH);
-       if (IS_ERR(tsdata->gpio_enable)) {
-               error = PTR_ERR(tsdata->gpio_enable);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "Failed to get enable gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(tsdata->gpio_enable))
+               return dev_err_probe(dev, PTR_ERR(tsdata->gpio_enable),
+                                    "Failed to get enable gpio\n");
 
        if (tsdata->gpio_enable)
                msleep(100);
index 76e7d62..78dd305 100644 (file)
@@ -1087,32 +1087,20 @@ static int raydium_i2c_probe(struct i2c_client *client)
        i2c_set_clientdata(client, ts);
 
        ts->avdd = devm_regulator_get(&client->dev, "avdd");
-       if (IS_ERR(ts->avdd)) {
-               error = PTR_ERR(ts->avdd);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get 'avdd' regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->avdd))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->avdd),
+                                    "Failed to get 'avdd' regulator\n");
 
        ts->vccio = devm_regulator_get(&client->dev, "vccio");
-       if (IS_ERR(ts->vccio)) {
-               error = PTR_ERR(ts->vccio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get 'vccio' regulator: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->vccio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->vccio),
+                                    "Failed to get 'vccio' regulator\n");
 
        ts->reset_gpio = devm_gpiod_get_optional(&client->dev, "reset",
                                                 GPIOD_OUT_LOW);
-       if (IS_ERR(ts->reset_gpio)) {
-               error = PTR_ERR(ts->reset_gpio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "failed to get reset gpio: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->reset_gpio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->reset_gpio),
+                                    "Failed to get reset gpio\n");
 
        error = raydium_i2c_power_on(ts);
        if (error)
index 6f754a8..7e761ec 100644 (file)
@@ -210,12 +210,8 @@ static int grts_probe(struct platform_device *pdev)
 
        /* get the channels from IIO device */
        st->iio_chans = devm_iio_channel_get_all(dev);
-       if (IS_ERR(st->iio_chans)) {
-               error = PTR_ERR(st->iio_chans);
-               if (error != -EPROBE_DEFER)
-                       dev_err(dev, "can't get iio channels.\n");
-               return error;
-       }
+       if (IS_ERR(st->iio_chans))
+               return dev_err_probe(dev, PTR_ERR(st->iio_chans), "can't get iio channels\n");
 
        if (!device_property_present(dev, "io-channel-names"))
                return -ENODEV;
index 9e28f96..62f562a 100644 (file)
@@ -706,11 +706,9 @@ static int silead_ts_probe(struct i2c_client *client)
 
        /* Power GPIO pin */
        data->gpio_power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
-       if (IS_ERR(data->gpio_power)) {
-               if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER)
-                       dev_err(dev, "Shutdown GPIO request failed\n");
-               return PTR_ERR(data->gpio_power);
-       }
+       if (IS_ERR(data->gpio_power))
+               return dev_err_probe(dev, PTR_ERR(data->gpio_power),
+                                    "Shutdown GPIO request failed\n");
 
        error = silead_ts_setup(client);
        if (error)
index 426564d..ed56cb5 100644 (file)
@@ -310,23 +310,15 @@ static int sis_ts_probe(struct i2c_client *client)
 
        ts->attn_gpio = devm_gpiod_get_optional(&client->dev,
                                                "attn", GPIOD_IN);
-       if (IS_ERR(ts->attn_gpio)) {
-               error = PTR_ERR(ts->attn_gpio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get attention GPIO: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->attn_gpio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->attn_gpio),
+                                    "Failed to get attention GPIO\n");
 
        ts->reset_gpio = devm_gpiod_get_optional(&client->dev,
                                                 "reset", GPIOD_OUT_LOW);
-       if (IS_ERR(ts->reset_gpio)) {
-               error = PTR_ERR(ts->reset_gpio);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev,
-                               "Failed to get reset GPIO: %d\n", error);
-               return error;
-       }
+       if (IS_ERR(ts->reset_gpio))
+               return dev_err_probe(&client->dev, PTR_ERR(ts->reset_gpio),
+                                    "Failed to get reset GPIO\n");
 
        sis_ts_reset(ts);
 
index 31d1402..7efbcd0 100644 (file)
@@ -221,7 +221,6 @@ static void surface3_spi_power(struct surface3_ts_data *data, bool on)
  */
 static int surface3_spi_get_gpio_config(struct surface3_ts_data *data)
 {
-       int error;
        struct device *dev;
        struct gpio_desc *gpiod;
        int i;
@@ -231,15 +230,9 @@ static int surface3_spi_get_gpio_config(struct surface3_ts_data *data)
        /* Get the reset lines GPIO pin number */
        for (i = 0; i < 2; i++) {
                gpiod = devm_gpiod_get_index(dev, NULL, i, GPIOD_OUT_LOW);
-               if (IS_ERR(gpiod)) {
-                       error = PTR_ERR(gpiod);
-                       if (error != -EPROBE_DEFER)
-                               dev_err(dev,
-                                       "Failed to get power GPIO %d: %d\n",
-                                       i,
-                                       error);
-                       return error;
-               }
+               if (IS_ERR(gpiod))
+                       return dev_err_probe(dev, PTR_ERR(gpiod),
+                                            "Failed to get power GPIO %d\n", i);
 
                data->gpiod_rst[i] = gpiod;
        }
index 0293c49..f5c5881 100644 (file)
@@ -323,13 +323,9 @@ static int sx8654_probe(struct i2c_client *client)
 
        sx8654->gpio_reset = devm_gpiod_get_optional(&client->dev, "reset",
                                                     GPIOD_OUT_HIGH);
-       if (IS_ERR(sx8654->gpio_reset)) {
-               error = PTR_ERR(sx8654->gpio_reset);
-               if (error != -EPROBE_DEFER)
-                       dev_err(&client->dev, "unable to get reset-gpio: %d\n",
-                               error);
-               return error;
-       }
+       if (IS_ERR(sx8654->gpio_reset))
+               return dev_err_probe(&client->dev, PTR_ERR(sx8654->gpio_reset),
+                                    "unable to get reset-gpio\n");
        dev_dbg(&client->dev, "got GPIO reset pin\n");
 
        sx8654->data = device_get_match_data(&client->dev);
index decf2d2..9aa4e35 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/sort.h>
 #include <linux/pm_wakeirq.h>
 
index 6046dfe..b92208e 100644 (file)
@@ -521,6 +521,15 @@ config LEDS_PCA963X
          LED driver chip accessed via the I2C bus. Supported
          devices include PCA9633 and PCA9634
 
+config LEDS_PCA995X
+       tristate "LED Support for PCA995x I2C chips"
+       depends on LEDS_CLASS
+       depends on I2C
+       help
+         This option enables support for LEDs connected to PCA995x
+         LED driver chips accessed via the I2C bus. Supported
+         devices include PCA9955BTW, PCA9952TW and PCA9955TW.
+
 config LEDS_WM831X_STATUS
        tristate "LED support for status LEDs on WM831x PMICs"
        depends on LEDS_CLASS
index d71f122..d7348e8 100644 (file)
@@ -72,6 +72,7 @@ obj-$(CONFIG_LEDS_OT200)              += leds-ot200.o
 obj-$(CONFIG_LEDS_PCA9532)             += leds-pca9532.o
 obj-$(CONFIG_LEDS_PCA955X)             += leds-pca955x.o
 obj-$(CONFIG_LEDS_PCA963X)             += leds-pca963x.o
+obj-$(CONFIG_LEDS_PCA995X)             += leds-pca995x.o
 obj-$(CONFIG_LEDS_PM8058)              += leds-pm8058.o
 obj-$(CONFIG_LEDS_POWERNV)             += leds-powernv.o
 obj-$(CONFIG_LEDS_PWM)                 += leds-pwm.o
index 945c842..bdcb737 100644 (file)
@@ -1,10 +1,10 @@
 config LEDS_BCM63138
        tristate "LED Support for Broadcom BCM63138 SoC"
        depends on LEDS_CLASS
-       depends on ARCH_BCM4908 || ARCH_BCM_5301X || BCM63XX || COMPILE_TEST
+       depends on ARCH_BCMBCA || ARCH_BCM_5301X || BCM63XX || COMPILE_TEST
        depends on HAS_IOMEM
        depends on OF
-       default ARCH_BCM4908
+       default ARCH_BCMBCA
        help
          This option enables support for LED controller that is part of
          BCM63138 SoC. The same hardware block is known to be also used
index 4ed2efc..4e08dbc 100644 (file)
@@ -89,6 +89,8 @@ config LEDS_QCOM_FLASH
          the total LED current will be split symmetrically on each channel and
          they will be enabled/disabled at the same time.
 
+         This driver can be built as a module, it will be called "leds-qcom-flash".
+
 config LEDS_RT4505
        tristate "LED support for RT4505 flashlight controller"
        depends on I2C && OF
index b089ca1..a73d3ea 100644 (file)
@@ -309,6 +309,10 @@ static int qcom_flash_strobe_set(struct led_classdev_flash *fled_cdev, bool stat
        struct qcom_flash_led *led = flcdev_to_qcom_fled(fled_cdev);
        int rc;
 
+       rc = set_flash_strobe(led, SW_STROBE, false);
+       if (rc)
+               return rc;
+
        rc = set_flash_current(led, led->flash_current_ma, FLASH_MODE);
        if (rc)
                return rc;
@@ -745,6 +749,7 @@ static int qcom_flash_led_probe(struct platform_device *pdev)
        return 0;
 
 release:
+       fwnode_handle_put(child);
        while (flash_data->v4l2_flash[flash_data->leds_count] && flash_data->leds_count)
                v4l2_flash_release(flash_data->v4l2_flash[flash_data->leds_count--]);
        return rc;
index e317408..ec62a48 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/led-class-multicolor.h>
+#include <linux/math.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
@@ -19,9 +20,10 @@ int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev,
        int i;
 
        for (i = 0; i < mcled_cdev->num_colors; i++)
-               mcled_cdev->subled_info[i].brightness = brightness *
-                                       mcled_cdev->subled_info[i].intensity /
-                                       led_cdev->max_brightness;
+               mcled_cdev->subled_info[i].brightness =
+                       DIV_ROUND_CLOSEST(brightness *
+                                         mcled_cdev->subled_info[i].intensity,
+                                         led_cdev->max_brightness);
 
        return 0;
 }
index 6dae56b..974b84f 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/of.h>
 #include "leds.h"
 
-static struct class *leds_class;
 static DEFINE_MUTEX(leds_lookup_lock);
 static LIST_HEAD(leds_lookup_list);
 
@@ -76,6 +75,19 @@ static ssize_t max_brightness_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(max_brightness);
 
+static ssize_t color_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       const char *color_text = "invalid";
+       struct led_classdev *led_cdev = dev_get_drvdata(dev);
+
+       if (led_cdev->color < LED_COLOR_ID_MAX)
+               color_text = led_colors[led_cdev->color];
+
+       return sysfs_emit(buf, "%s\n", color_text);
+}
+static DEVICE_ATTR_RO(color);
+
 #ifdef CONFIG_LEDS_TRIGGERS
 static BIN_ATTR(trigger, 0644, led_trigger_read, led_trigger_write, 0);
 static struct bin_attribute *led_trigger_bin_attrs[] = {
@@ -90,6 +102,7 @@ static const struct attribute_group led_trigger_group = {
 static struct attribute *led_class_attrs[] = {
        &dev_attr_brightness.attr,
        &dev_attr_max_brightness.attr,
+       &dev_attr_color.attr,
        NULL,
 };
 
@@ -234,6 +247,12 @@ static struct led_classdev *led_module_get(struct device *led_dev)
        return led_cdev;
 }
 
+static const struct class leds_class = {
+       .name = "leds",
+       .dev_groups = led_groups,
+       .pm = &leds_class_dev_pm_ops,
+};
+
 /**
  * of_led_get() - request a LED device via the LED framework
  * @np: device node to get the LED device from
@@ -251,7 +270,7 @@ struct led_classdev *of_led_get(struct device_node *np, int index)
        if (!led_node)
                return ERR_PTR(-ENOENT);
 
-       led_dev = class_find_device_by_of_node(leds_class, led_node);
+       led_dev = class_find_device_by_of_node(&leds_class, led_node);
        of_node_put(led_node);
        put_device(led_dev);
 
@@ -346,7 +365,7 @@ struct led_classdev *led_get(struct device *dev, char *con_id)
        if (!provider)
                return ERR_PTR(-ENOENT);
 
-       led_dev = class_find_device_by_name(leds_class, provider);
+       led_dev = class_find_device_by_name(&leds_class, provider);
        kfree_const(provider);
 
        return led_module_get(led_dev);
@@ -402,6 +421,31 @@ void led_remove_lookup(struct led_lookup_data *led_lookup)
 }
 EXPORT_SYMBOL_GPL(led_remove_lookup);
 
+/**
+ * devm_of_led_get_optional - Resource-managed request of an optional LED device
+ * @dev:       LED consumer
+ * @index:     index of the LED to obtain in the consumer
+ *
+ * The device node of the device is parsed to find the requested LED device.
+ * The LED device returned from this function is automatically released
+ * on driver detach.
+ *
+ * @return a pointer to a LED device, ERR_PTR(errno) on failure and NULL if the
+ * led was not found.
+ */
+struct led_classdev *__must_check devm_of_led_get_optional(struct device *dev,
+                                                       int index)
+{
+       struct led_classdev *led;
+
+       led = devm_of_led_get(dev, index);
+       if (IS_ERR(led) && PTR_ERR(led) == -ENOENT)
+               return NULL;
+
+       return led;
+}
+EXPORT_SYMBOL_GPL(devm_of_led_get_optional);
+
 static int led_classdev_next_name(const char *init_name, char *name,
                                  size_t len)
 {
@@ -412,7 +456,7 @@ static int led_classdev_next_name(const char *init_name, char *name,
        strscpy(name, init_name, len);
 
        while ((ret < len) &&
-              (dev = class_find_device_by_name(leds_class, name))) {
+              (dev = class_find_device_by_name(&leds_class, name))) {
                put_device(dev);
                ret = snprintf(name, len, "%s_%u", init_name, ++i);
        }
@@ -457,6 +501,14 @@ int led_classdev_register_ext(struct device *parent,
                        if (fwnode_property_present(init_data->fwnode,
                                                    "retain-state-shutdown"))
                                led_cdev->flags |= LED_RETAIN_AT_SHUTDOWN;
+
+                       fwnode_property_read_u32(init_data->fwnode,
+                               "max-brightness",
+                               &led_cdev->max_brightness);
+
+                       if (fwnode_property_present(init_data->fwnode, "color"))
+                               fwnode_property_read_u32(init_data->fwnode, "color",
+                                                        &led_cdev->color);
                }
        } else {
                proposed_name = led_cdev->name;
@@ -466,10 +518,13 @@ int led_classdev_register_ext(struct device *parent,
        if (ret < 0)
                return ret;
 
+       if (led_cdev->color >= LED_COLOR_ID_MAX)
+               dev_warn(parent, "LED %s color identifier out of range\n", final_name);
+
        mutex_init(&led_cdev->led_access);
        mutex_lock(&led_cdev->led_access);
-       led_cdev->dev = device_create_with_groups(leds_class, parent, 0,
-                               led_cdev, led_cdev->groups, "%s", final_name);
+       led_cdev->dev = device_create_with_groups(&leds_class, parent, 0,
+                                                 led_cdev, led_cdev->groups, "%s", final_name);
        if (IS_ERR(led_cdev->dev)) {
                mutex_unlock(&led_cdev->led_access);
                return PTR_ERR(led_cdev->dev);
@@ -626,17 +681,12 @@ EXPORT_SYMBOL_GPL(devm_led_classdev_unregister);
 
 static int __init leds_init(void)
 {
-       leds_class = class_create("leds");
-       if (IS_ERR(leds_class))
-               return PTR_ERR(leds_class);
-       leds_class->pm = &leds_class_dev_pm_ops;
-       leds_class->dev_groups = led_groups;
-       return 0;
+       return class_register(&leds_class);
 }
 
 static void __exit leds_exit(void)
 {
-       class_destroy(leds_class);
+       class_unregister(&leds_class);
 }
 
 subsys_initcall(leds_init);
index b9b1295..04f9ea6 100644 (file)
@@ -474,15 +474,15 @@ int led_compose_name(struct device *dev, struct led_init_data *init_data,
        struct fwnode_handle *fwnode = init_data->fwnode;
        const char *devicename = init_data->devicename;
 
-       /* We want to label LEDs that can produce full range of colors
-        * as RGB, not multicolor */
-       BUG_ON(props.color == LED_COLOR_ID_MULTI);
-
        if (!led_classdev_name)
                return -EINVAL;
 
        led_parse_fwnode_props(dev, fwnode, &props);
 
+       /* We want to label LEDs that can produce full range of colors
+        * as RGB, not multicolor */
+       BUG_ON(props.color == LED_COLOR_ID_MULTI);
+
        if (props.label) {
                /*
                 * If init_data.devicename is NULL, then it indicates that
index 24b1041..0216afe 100644 (file)
@@ -344,7 +344,7 @@ MODULE_DEVICE_TABLE(i2c, an30259a_id);
 static struct i2c_driver an30259a_driver = {
        .driver = {
                .name = "leds-an30259a",
-               .of_match_table = of_match_ptr(an30259a_match_table),
+               .of_match_table = an30259a_match_table,
        },
        .probe = an30259a_probe,
        .remove = an30259a_remove,
index 49e1bdd..dd319c7 100644 (file)
@@ -7,8 +7,8 @@
 
 #include <linux/module.h>
 #include <linux/leds.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
-#include <linux/of_platform.h>
 
 enum ec_index {
        EC_BLUE_LED     = 0x01,
index 96979b8..691a743 100644 (file)
@@ -368,7 +368,7 @@ static int aw200xx_probe_fw(struct device *dev, struct aw200xx *chip)
 
        if (!chip->display_rows ||
            chip->display_rows > chip->cdef->display_size_rows_max) {
-               return dev_err_probe(dev, ret,
+               return dev_err_probe(dev, -EINVAL,
                                     "Invalid leds display size %u\n",
                                     chip->display_rows);
        }
@@ -583,7 +583,7 @@ static struct i2c_driver aw200xx_driver = {
                .name = "aw200xx",
                .of_match_table = aw200xx_match_table,
        },
-       .probe_new = aw200xx_probe,
+       .probe = aw200xx_probe,
        .remove = aw200xx_remove,
        .id_table = aw200xx_id,
 };
index 5976564..91f44b2 100644 (file)
@@ -62,7 +62,7 @@ struct aw2013_led {
 
 struct aw2013 {
        struct mutex mutex; /* held when writing to registers */
-       struct regulator *vcc_regulator;
+       struct regulator_bulk_data regulators[2];
        struct i2c_client *client;
        struct aw2013_led leds[AW2013_MAX_LEDS];
        struct regmap *regmap;
@@ -106,10 +106,11 @@ static void aw2013_chip_disable(struct aw2013 *chip)
 
        regmap_write(chip->regmap, AW2013_GCR, 0);
 
-       ret = regulator_disable(chip->vcc_regulator);
+       ret = regulator_bulk_disable(ARRAY_SIZE(chip->regulators),
+                                    chip->regulators);
        if (ret) {
                dev_err(&chip->client->dev,
-                       "Failed to disable regulator: %d\n", ret);
+                       "Failed to disable regulators: %d\n", ret);
                return;
        }
 
@@ -123,10 +124,11 @@ static int aw2013_chip_enable(struct aw2013 *chip)
        if (chip->enabled)
                return 0;
 
-       ret = regulator_enable(chip->vcc_regulator);
+       ret = regulator_bulk_enable(ARRAY_SIZE(chip->regulators),
+                                   chip->regulators);
        if (ret) {
                dev_err(&chip->client->dev,
-                       "Failed to enable regulator: %d\n", ret);
+                       "Failed to enable regulators: %d\n", ret);
                return ret;
        }
        chip->enabled = true;
@@ -348,19 +350,23 @@ static int aw2013_probe(struct i2c_client *client)
                goto error;
        }
 
-       chip->vcc_regulator = devm_regulator_get(&client->dev, "vcc");
-       ret = PTR_ERR_OR_ZERO(chip->vcc_regulator);
-       if (ret) {
+       chip->regulators[0].supply = "vcc";
+       chip->regulators[1].supply = "vio";
+       ret = devm_regulator_bulk_get(&client->dev,
+                                     ARRAY_SIZE(chip->regulators),
+                                     chip->regulators);
+       if (ret < 0) {
                if (ret != -EPROBE_DEFER)
                        dev_err(&client->dev,
-                               "Failed to request regulator: %d\n", ret);
+                               "Failed to request regulators: %d\n", ret);
                goto error;
        }
 
-       ret = regulator_enable(chip->vcc_regulator);
+       ret = regulator_bulk_enable(ARRAY_SIZE(chip->regulators),
+                                   chip->regulators);
        if (ret) {
                dev_err(&client->dev,
-                       "Failed to enable regulator: %d\n", ret);
+                       "Failed to enable regulators: %d\n", ret);
                goto error;
        }
 
@@ -382,10 +388,11 @@ static int aw2013_probe(struct i2c_client *client)
        if (ret < 0)
                goto error_reg;
 
-       ret = regulator_disable(chip->vcc_regulator);
+       ret = regulator_bulk_disable(ARRAY_SIZE(chip->regulators),
+                                    chip->regulators);
        if (ret) {
                dev_err(&client->dev,
-                       "Failed to disable regulator: %d\n", ret);
+                       "Failed to disable regulators: %d\n", ret);
                goto error;
        }
 
@@ -394,7 +401,8 @@ static int aw2013_probe(struct i2c_client *client)
        return 0;
 
 error_reg:
-       regulator_disable(chip->vcc_regulator);
+       regulator_bulk_disable(ARRAY_SIZE(chip->regulators),
+                              chip->regulators);
 
 error:
        mutex_destroy(&chip->mutex);
@@ -420,7 +428,7 @@ MODULE_DEVICE_TABLE(of, aw2013_match_table);
 static struct i2c_driver aw2013_driver = {
        .driver = {
                .name = "leds-aw2013",
-               .of_match_table = of_match_ptr(aw2013_match_table),
+               .of_match_table = aw2013_match_table,
        },
        .probe = aw2013_probe,
        .remove = aw2013_remove,
index 7d41ce8..87354f1 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/mfd/motorola-cpcap.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
index c87686b..b33bca3 100644 (file)
@@ -4,8 +4,8 @@
 
 #include <linux/delay.h>
 #include <linux/leds.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/spi/spi.h>
 #include <linux/workqueue.h>
 
index 1f952ba..2df24c3 100644 (file)
@@ -27,22 +27,16 @@ static void ip30led_set(struct led_classdev *led_cdev,
 
 static int ip30led_create(struct platform_device *pdev, int num)
 {
-       struct resource *res;
        struct ip30_led *data;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, num);
-       if (!res)
-               return -EBUSY;
-
        data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
        if (!data)
                return -ENOMEM;
 
-       data->reg = devm_ioremap_resource(&pdev->dev, res);
+       data->reg = devm_platform_ioremap_resource(pdev, num);
        if (IS_ERR(data->reg))
                return PTR_ERR(data->reg);
 
-
        switch (num) {
        case IP30_LED_SYSTEM:
                data->cdev.name = "white:power";
index 72cb56d..b0a0be7 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 /* Used to indicate a device has no such register */
 #define IS31FL32XX_REG_NONE 0xFF
index 030c040..2ef19ad 100644 (file)
@@ -594,18 +594,17 @@ static const struct i2c_device_id lp5521_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lp5521_id);
 
-#ifdef CONFIG_OF
 static const struct of_device_id of_lp5521_leds_match[] = {
        { .compatible = "national,lp5521", },
        {},
 };
 
 MODULE_DEVICE_TABLE(of, of_lp5521_leds_match);
-#endif
+
 static struct i2c_driver lp5521_driver = {
        .driver = {
                .name   = "lp5521",
-               .of_match_table = of_match_ptr(of_lp5521_leds_match),
+               .of_match_table = of_lp5521_leds_match,
        },
        .probe          = lp5521_probe,
        .remove         = lp5521_remove,
index daa6a16..38de853 100644 (file)
@@ -972,7 +972,6 @@ static const struct i2c_device_id lp5523_id[] = {
 
 MODULE_DEVICE_TABLE(i2c, lp5523_id);
 
-#ifdef CONFIG_OF
 static const struct of_device_id of_lp5523_leds_match[] = {
        { .compatible = "national,lp5523", },
        { .compatible = "ti,lp55231", },
@@ -980,12 +979,11 @@ static const struct of_device_id of_lp5523_leds_match[] = {
 };
 
 MODULE_DEVICE_TABLE(of, of_lp5523_leds_match);
-#endif
 
 static struct i2c_driver lp5523_driver = {
        .driver = {
                .name   = "lp5523x",
-               .of_match_table = of_match_ptr(of_lp5523_leds_match),
+               .of_match_table = of_lp5523_leds_match,
        },
        .probe          = lp5523_probe,
        .remove         = lp5523_remove,
index 4565cc1..39db9ae 100644 (file)
@@ -589,19 +589,17 @@ static const struct i2c_device_id lp5562_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lp5562_id);
 
-#ifdef CONFIG_OF
 static const struct of_device_id of_lp5562_leds_match[] = {
        { .compatible = "ti,lp5562", },
        {},
 };
 
 MODULE_DEVICE_TABLE(of, of_lp5562_leds_match);
-#endif
 
 static struct i2c_driver lp5562_driver = {
        .driver = {
                .name   = "lp5562",
-               .of_match_table = of_match_ptr(of_lp5562_leds_match),
+               .of_match_table = of_lp5562_leds_match,
        },
        .probe          = lp5562_probe,
        .remove         = lp5562_remove,
index f11886a..ac50aa8 100644 (file)
@@ -380,19 +380,17 @@ static const struct i2c_device_id lp8501_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, lp8501_id);
 
-#ifdef CONFIG_OF
 static const struct of_device_id of_lp8501_leds_match[] = {
        { .compatible = "ti,lp8501", },
        {},
 };
 
 MODULE_DEVICE_TABLE(of, of_lp8501_leds_match);
-#endif
 
 static struct i2c_driver lp8501_driver = {
        .driver = {
                .name   = "lp8501",
-               .of_match_table = of_match_ptr(of_lp8501_leds_match),
+               .of_match_table = of_lp8501_leds_match,
        },
        .probe          = lp8501_probe,
        .remove         = lp8501_remove,
index b7855c9..3921065 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/io.h>
 #include <linux/leds.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_data/mlxreg.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
index 1677d66..f3010c4 100644 (file)
@@ -247,7 +247,7 @@ static int ns2_led_probe(struct platform_device *pdev)
        if (!count)
                return -ENODEV;
 
-       leds = devm_kzalloc(dev, array_size(sizeof(*leds), count), GFP_KERNEL);
+       leds = devm_kcalloc(dev, count, sizeof(*leds), GFP_KERNEL);
        if (!leds)
                return -ENOMEM;
 
index 8b5c620..bf8bb8f 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/leds-pca9532.h>
 #include <linux/gpio/driver.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 /* m =  num_leds*/
 #define PCA9532_REG_INPUT(i)   ((i) >> 3)
diff --git a/drivers/leds/leds-pca995x.c b/drivers/leds/leds-pca995x.c
new file mode 100644 (file)
index 0000000..78215df
--- /dev/null
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * LED driver for PCA995x I2C LED drivers
+ *
+ * Copyright 2011 bct electronic GmbH
+ * Copyright 2013 Qtechnology/AS
+ * Copyright 2022 NXP
+ * Copyright 2023 Marek Vasut
+ */
+
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+
+/* Register definition */
+#define PCA995X_MODE1                  0x00
+#define PCA995X_MODE2                  0x01
+#define PCA995X_LEDOUT0                        0x02
+#define PCA9955B_PWM0                  0x08
+#define PCA9952_PWM0                   0x0A
+#define PCA9952_IREFALL                        0x43
+#define PCA9955B_IREFALL               0x45
+
+/* Auto-increment disabled. Normal mode */
+#define PCA995X_MODE1_CFG              0x00
+
+/* LED select registers determine the source that drives LED outputs */
+#define PCA995X_LED_OFF                        0x0
+#define PCA995X_LED_ON                 0x1
+#define PCA995X_LED_PWM_MODE           0x2
+#define PCA995X_LDRX_MASK              0x3
+#define PCA995X_LDRX_BITS              2
+
+#define PCA995X_MAX_OUTPUTS            16
+#define PCA995X_OUTPUTS_PER_REG                4
+
+#define PCA995X_IREFALL_FULL_CFG       0xFF
+#define PCA995X_IREFALL_HALF_CFG       (PCA995X_IREFALL_FULL_CFG / 2)
+
+#define PCA995X_TYPE_NON_B             0
+#define PCA995X_TYPE_B                 1
+
+#define ldev_to_led(c) container_of(c, struct pca995x_led, ldev)
+
+struct pca995x_led {
+       unsigned int led_no;
+       struct led_classdev ldev;
+       struct pca995x_chip *chip;
+};
+
+struct pca995x_chip {
+       struct regmap *regmap;
+       struct pca995x_led leds[PCA995X_MAX_OUTPUTS];
+       int btype;
+};
+
+static int pca995x_brightness_set(struct led_classdev *led_cdev,
+                                 enum led_brightness brightness)
+{
+       struct pca995x_led *led = ldev_to_led(led_cdev);
+       struct pca995x_chip *chip = led->chip;
+       u8 ledout_addr, pwmout_addr;
+       int shift, ret;
+
+       pwmout_addr = (chip->btype ? PCA9955B_PWM0 : PCA9952_PWM0) + led->led_no;
+       ledout_addr = PCA995X_LEDOUT0 + (led->led_no / PCA995X_OUTPUTS_PER_REG);
+       shift = PCA995X_LDRX_BITS * (led->led_no % PCA995X_OUTPUTS_PER_REG);
+
+       switch (brightness) {
+       case LED_FULL:
+               return regmap_update_bits(chip->regmap, ledout_addr,
+                                         PCA995X_LDRX_MASK << shift,
+                                         PCA995X_LED_ON << shift);
+       case LED_OFF:
+               return regmap_update_bits(chip->regmap, ledout_addr,
+                                         PCA995X_LDRX_MASK << shift, 0);
+       default:
+               /* Adjust brightness as per user input by changing individual PWM */
+               ret = regmap_write(chip->regmap, pwmout_addr, brightness);
+               if (ret)
+                       return ret;
+
+               /*
+                * Change LDRx configuration to individual brightness via PWM.
+                * LED will stop blinking if it's doing so.
+                */
+               return regmap_update_bits(chip->regmap, ledout_addr,
+                                         PCA995X_LDRX_MASK << shift,
+                                         PCA995X_LED_PWM_MODE << shift);
+       }
+}
+
+static const struct regmap_config pca995x_regmap = {
+       .reg_bits = 8,
+       .val_bits = 8,
+       .max_register = 0x49,
+};
+
+static int pca995x_probe(struct i2c_client *client)
+{
+       struct fwnode_handle *led_fwnodes[PCA995X_MAX_OUTPUTS] = { 0 };
+       struct fwnode_handle *np, *child;
+       struct device *dev = &client->dev;
+       struct pca995x_chip *chip;
+       struct pca995x_led *led;
+       int i, btype, reg, ret;
+
+       btype = (unsigned long)device_get_match_data(&client->dev);
+
+       np = dev_fwnode(dev);
+       if (!np)
+               return -ENODEV;
+
+       chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+       if (!chip)
+               return -ENOMEM;
+
+       chip->btype = btype;
+       chip->regmap = devm_regmap_init_i2c(client, &pca995x_regmap);
+       if (IS_ERR(chip->regmap))
+               return PTR_ERR(chip->regmap);
+
+       i2c_set_clientdata(client, chip);
+
+       fwnode_for_each_available_child_node(np, child) {
+               ret = fwnode_property_read_u32(child, "reg", &reg);
+               if (ret) {
+                       fwnode_handle_put(child);
+                       return ret;
+               }
+
+               if (reg < 0 || reg >= PCA995X_MAX_OUTPUTS || led_fwnodes[reg]) {
+                       fwnode_handle_put(child);
+                       return -EINVAL;
+               }
+
+               led = &chip->leds[reg];
+               led_fwnodes[reg] = child;
+               led->chip = chip;
+               led->led_no = reg;
+               led->ldev.brightness_set_blocking = pca995x_brightness_set;
+               led->ldev.max_brightness = 255;
+       }
+
+       for (i = 0; i < PCA995X_MAX_OUTPUTS; i++) {
+               struct led_init_data init_data = {};
+
+               if (!led_fwnodes[i])
+                       continue;
+
+               init_data.fwnode = led_fwnodes[i];
+
+               ret = devm_led_classdev_register_ext(dev,
+                                                    &chip->leds[i].ldev,
+                                                    &init_data);
+               if (ret < 0) {
+                       fwnode_handle_put(child);
+                       return dev_err_probe(dev, ret,
+                                            "Could not register LED %s\n",
+                                            chip->leds[i].ldev.name);
+               }
+       }
+
+       /* Disable LED all-call address and set normal mode */
+       ret = regmap_write(chip->regmap, PCA995X_MODE1, PCA995X_MODE1_CFG);
+       if (ret)
+               return ret;
+
+       /* IREF Output current value for all LEDn outputs */
+       return regmap_write(chip->regmap,
+                           btype ? PCA9955B_IREFALL : PCA9952_IREFALL,
+                           PCA995X_IREFALL_HALF_CFG);
+}
+
+static const struct i2c_device_id pca995x_id[] = {
+       { "pca9952", .driver_data = (kernel_ulong_t)PCA995X_TYPE_NON_B },
+       { "pca9955b", .driver_data = (kernel_ulong_t)PCA995X_TYPE_B },
+       {}
+};
+MODULE_DEVICE_TABLE(i2c, pca995x_id);
+
+static const struct of_device_id pca995x_of_match[] = {
+       { .compatible = "nxp,pca9952",  .data = (void *)PCA995X_TYPE_NON_B },
+       { .compatible = "nxp,pca9955b", .data = (void *)PCA995X_TYPE_B },
+       {},
+};
+MODULE_DEVICE_TABLE(of, pca995x_of_match);
+
+static struct i2c_driver pca995x_driver = {
+       .driver = {
+               .name = "leds-pca995x",
+               .of_match_table = pca995x_of_match,
+       },
+       .probe = pca995x_probe,
+       .id_table = pca995x_id,
+};
+module_i2c_driver(pca995x_driver);
+
+MODULE_AUTHOR("Isai Gaspar <isaiezequiel.gaspar@nxp.com>");
+MODULE_DESCRIPTION("PCA995x LED driver");
+MODULE_LICENSE("GPL");
index b9233f1..3f49a51 100644 (file)
@@ -4,7 +4,6 @@
 #include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/regmap.h>
index 29194cc..419b710 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/leds.h>
 #include <linux/err.h>
 #include <linux/pwm.h>
@@ -146,7 +146,7 @@ static int led_pwm_create_fwnode(struct device *dev, struct led_pwm_priv *priv)
                        led.name = to_of_node(fwnode)->name;
 
                if (!led.name) {
-                       ret = EINVAL;
+                       ret = -EINVAL;
                        goto err_child_out;
                }
 
index 2c7ffc3..9d91f21 100644 (file)
@@ -30,7 +30,7 @@
 
 #include <linux/leds.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/spi/spi.h>
 #include <linux/mutex.h>
 #include <uapi/linux/uleds.h>
index e38abb5..360a376 100644 (file)
@@ -7,8 +7,7 @@
  */
 #include <linux/io.h>
 #include <linux/init.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
index d7f10ad..b249166 100644 (file)
@@ -7,7 +7,7 @@
 
 #include <linux/bitops.h>
 #include <linux/err.h>
-#include <linux/of_device.h>
+#include <linux/property.h>
 
 #include <linux/leds-ti-lmu-common.h>
 
index dfc6fb2..945e831 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
index 64b2d7b..b8a95a9 100644 (file)
@@ -156,24 +156,20 @@ static ssize_t brightness_show(struct device *dev, struct device_attribute *a,
                               char *buf)
 {
        struct i2c_client *client = to_i2c_client(dev);
-       struct omnia_leds *leds = i2c_get_clientdata(client);
        int ret;
 
-       mutex_lock(&leds->lock);
        ret = i2c_smbus_read_byte_data(client, CMD_LED_GET_BRIGHTNESS);
-       mutex_unlock(&leds->lock);
 
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", ret);
+       return sysfs_emit(buf, "%d\n", ret);
 }
 
 static ssize_t brightness_store(struct device *dev, struct device_attribute *a,
                                const char *buf, size_t count)
 {
        struct i2c_client *client = to_i2c_client(dev);
-       struct omnia_leds *leds = i2c_get_clientdata(client);
        unsigned long brightness;
        int ret;
 
@@ -183,15 +179,10 @@ static ssize_t brightness_store(struct device *dev, struct device_attribute *a,
        if (brightness > 100)
                return -EINVAL;
 
-       mutex_lock(&leds->lock);
        ret = i2c_smbus_write_byte_data(client, CMD_LED_SET_BRIGHTNESS,
                                        (u8)brightness);
-       mutex_unlock(&leds->lock);
-
-       if (ret < 0)
-               return ret;
 
-       return count;
+       return ret < 0 ? ret : count;
 }
 static DEVICE_ATTR_RW(brightness);
 
index 360c867..183bccc 100644 (file)
@@ -2,6 +2,18 @@
 
 if LEDS_CLASS_MULTICOLOR
 
+config LEDS_GROUP_MULTICOLOR
+       tristate "LEDs group multi-color support"
+       depends on OF || COMPILE_TEST
+       help
+         This option enables support for monochrome LEDs that are grouped
+         into multicolor LEDs which is useful in the case where LEDs of
+         different colors are physically grouped in a single multi-color LED
+         and driven by a controller that doesn't have multi-color support.
+
+         To compile this driver as a module, choose M here: the module
+         will be called leds-group-multicolor.
+
 config LEDS_PWM_MULTICOLOR
        tristate "PWM driven multi-color LED Support"
        depends on PWM
index 8c01daf..c11cc56 100644 (file)
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
+obj-$(CONFIG_LEDS_GROUP_MULTICOLOR)    += leds-group-multicolor.o
 obj-$(CONFIG_LEDS_PWM_MULTICOLOR)      += leds-pwm-multicolor.o
 obj-$(CONFIG_LEDS_QCOM_LPG)            += leds-qcom-lpg.o
 obj-$(CONFIG_LEDS_MT6370_RGB)          += leds-mt6370-rgb.o
diff --git a/drivers/leds/rgb/leds-group-multicolor.c b/drivers/leds/rgb/leds-group-multicolor.c
new file mode 100644 (file)
index 0000000..39f58be
--- /dev/null
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Multi-color LED built with monochromatic LED devices
+ *
+ * This driver groups several monochromatic LED devices in a single multicolor LED device.
+ *
+ * Compared to handling this grouping in user-space, the benefits are:
+ * - The state of the monochromatic LED relative to each other is always consistent.
+ * - The sysfs interface of the LEDs can be used for the group as a whole.
+ *
+ * Copyright 2023 Jean-Jacques Hiblot <jjhiblot@traphandler.com>
+ */
+
+#include <linux/err.h>
+#include <linux/leds.h>
+#include <linux/led-class-multicolor.h>
+#include <linux/math.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+
+struct leds_multicolor {
+       struct led_classdev_mc mc_cdev;
+       struct led_classdev **monochromatics;
+};
+
+static int leds_gmc_set(struct led_classdev *cdev, enum led_brightness brightness)
+{
+       struct led_classdev_mc *mc_cdev = lcdev_to_mccdev(cdev);
+       struct leds_multicolor *priv = container_of(mc_cdev, struct leds_multicolor, mc_cdev);
+       const unsigned int group_max_brightness = mc_cdev->led_cdev.max_brightness;
+       int i;
+
+       for (i = 0; i < mc_cdev->num_colors; i++) {
+               struct led_classdev *mono = priv->monochromatics[i];
+               const unsigned int mono_max_brightness = mono->max_brightness;
+               unsigned int intensity = mc_cdev->subled_info[i].intensity;
+               int mono_brightness;
+
+               /*
+                * Scale the brightness according to relative intensity of the
+                * color AND the max brightness of the monochromatic LED.
+                */
+               mono_brightness = DIV_ROUND_CLOSEST(brightness * intensity * mono_max_brightness,
+                                                   group_max_brightness * group_max_brightness);
+
+               led_set_brightness(mono, mono_brightness);
+       }
+
+       return 0;
+}
+
+static void restore_sysfs_write_access(void *data)
+{
+       struct led_classdev *led_cdev = data;
+
+       /* Restore the write acccess to the LED */
+       mutex_lock(&led_cdev->led_access);
+       led_sysfs_enable(led_cdev);
+       mutex_unlock(&led_cdev->led_access);
+}
+
+static int leds_gmc_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct led_init_data init_data = {};
+       struct led_classdev *cdev;
+       struct mc_subled *subled;
+       struct leds_multicolor *priv;
+       unsigned int max_brightness = 0;
+       int i, ret, count = 0;
+
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       for (;;) {
+               struct led_classdev *led_cdev;
+
+               led_cdev = devm_of_led_get_optional(dev, count);
+               if (IS_ERR(led_cdev))
+                       return dev_err_probe(dev, PTR_ERR(led_cdev), "Unable to get LED #%d",
+                                            count);
+               if (!led_cdev)
+                       break;
+
+               priv->monochromatics = devm_krealloc_array(dev, priv->monochromatics,
+                                       count + 1, sizeof(*priv->monochromatics),
+                                       GFP_KERNEL);
+               if (!priv->monochromatics)
+                       return -ENOMEM;
+
+               priv->monochromatics[count] = led_cdev;
+
+               max_brightness = max(max_brightness, led_cdev->max_brightness);
+
+               count++;
+       }
+
+       subled = devm_kcalloc(dev, count, sizeof(*subled), GFP_KERNEL);
+       if (!subled)
+               return -ENOMEM;
+       priv->mc_cdev.subled_info = subled;
+
+       for (i = 0; i < count; i++) {
+               struct led_classdev *led_cdev = priv->monochromatics[i];
+
+               subled[i].color_index = led_cdev->color;
+
+               /* Configure the LED intensity to its maximum */
+               subled[i].intensity = max_brightness;
+       }
+
+       /* Initialise the multicolor's LED class device */
+       cdev = &priv->mc_cdev.led_cdev;
+       cdev->flags = LED_CORE_SUSPENDRESUME;
+       cdev->brightness_set_blocking = leds_gmc_set;
+       cdev->max_brightness = max_brightness;
+       cdev->color = LED_COLOR_ID_MULTI;
+       priv->mc_cdev.num_colors = count;
+
+       init_data.fwnode = dev_fwnode(dev);
+       ret = devm_led_classdev_multicolor_register_ext(dev, &priv->mc_cdev, &init_data);
+       if (ret)
+               return dev_err_probe(dev, ret, "failed to register multicolor LED for %s.\n",
+                                    cdev->name);
+
+       ret = leds_gmc_set(cdev, cdev->brightness);
+       if (ret)
+               return dev_err_probe(dev, ret, "failed to set LED value for %s.", cdev->name);
+
+       for (i = 0; i < count; i++) {
+               struct led_classdev *led_cdev = priv->monochromatics[i];
+
+               /*
+                * Make the individual LED sysfs interface read-only to prevent the user
+                * to change the brightness of the individual LEDs of the group.
+                */
+               mutex_lock(&led_cdev->led_access);
+               led_sysfs_disable(led_cdev);
+               mutex_unlock(&led_cdev->led_access);
+
+               /* Restore the write access to the LED sysfs when the group is destroyed */
+               devm_add_action_or_reset(dev, restore_sysfs_write_access, led_cdev);
+       }
+
+       return 0;
+}
+
+static const struct of_device_id of_leds_group_multicolor_match[] = {
+       { .compatible = "leds-group-multicolor" },
+       {}
+};
+MODULE_DEVICE_TABLE(of, of_leds_group_multicolor_match);
+
+static struct platform_driver leds_group_multicolor_driver = {
+       .probe          = leds_gmc_probe,
+       .driver         = {
+               .name   = "leds_group_multicolor",
+               .of_match_table = of_leds_group_multicolor_match,
+       }
+};
+module_platform_driver(leds_group_multicolor_driver);
+
+MODULE_AUTHOR("Jean-Jacques Hiblot <jjhiblot@traphandler.com>");
+MODULE_DESCRIPTION("LEDs group multicolor driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:leds-group-multicolor");
index 59581b3..df469aa 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/led-class-multicolor.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
@@ -1093,7 +1092,6 @@ static int lpg_add_pwm(struct lpg *lpg)
 {
        int ret;
 
-       lpg->pwm.base = -1;
        lpg->pwm.dev = lpg->dev;
        lpg->pwm.npwm = lpg->num_channels;
        lpg->pwm.ops = &lpg_pwm_ops;
index 609e438..e616cc6 100644 (file)
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config LEDS_SIEMENS_SIMATIC_IPC
        tristate "LED driver for Siemens Simatic IPCs"
+       depends on LEDS_CLASS
        depends on SIEMENS_SIMATIC_IPC
        default y
        help
@@ -35,3 +36,16 @@ config LEDS_SIEMENS_SIMATIC_IPC_F7188X
 
          To compile this driver as a module, choose M here: the module
          will be called simatic-ipc-leds-gpio-f7188x.
+
+config LEDS_SIEMENS_SIMATIC_IPC_ELKHARTLAKE
+       tristate "LED driver for Siemens Simatic IPCs based on Intel Elkhart Lake GPIO"
+       depends on LEDS_GPIO
+       depends on PINCTRL_ELKHARTLAKE
+       depends on SIEMENS_SIMATIC_IPC
+       default LEDS_SIEMENS_SIMATIC_IPC
+       help
+         This option enables support for the LEDs of several Industrial PCs
+         from Siemens based on Elkhart Lake GPIO i.e. BX-21A.
+
+         To compile this driver as a module, choose M here: the module
+         will be called simatic-ipc-leds-gpio-elkhartlake.
index e3e840c..783578f 100644 (file)
@@ -2,3 +2,4 @@
 obj-$(CONFIG_LEDS_SIEMENS_SIMATIC_IPC)                 += simatic-ipc-leds.o
 obj-$(CONFIG_LEDS_SIEMENS_SIMATIC_IPC_APOLLOLAKE)      += simatic-ipc-leds-gpio-core.o simatic-ipc-leds-gpio-apollolake.o
 obj-$(CONFIG_LEDS_SIEMENS_SIMATIC_IPC_F7188X)          += simatic-ipc-leds-gpio-core.o simatic-ipc-leds-gpio-f7188x.o
+obj-$(CONFIG_LEDS_SIEMENS_SIMATIC_IPC_ELKHARTLAKE)     += simatic-ipc-leds-gpio-core.o simatic-ipc-leds-gpio-elkhartlake.o
index 2a21b66..c552ea7 100644 (file)
@@ -57,6 +57,7 @@ int simatic_ipc_leds_gpio_probe(struct platform_device *pdev,
        switch (plat->devmode) {
        case SIMATIC_IPC_DEVICE_127E:
        case SIMATIC_IPC_DEVICE_227G:
+       case SIMATIC_IPC_DEVICE_BX_21A:
                break;
        default:
                return -ENODEV;
@@ -72,6 +73,9 @@ int simatic_ipc_leds_gpio_probe(struct platform_device *pdev,
                goto out;
        }
 
+       if (!table_extra)
+               return 0;
+
        table_extra->dev_id = dev_name(dev);
        gpiod_add_lookup_table(table_extra);
 
diff --git a/drivers/leds/simple/simatic-ipc-leds-gpio-elkhartlake.c b/drivers/leds/simple/simatic-ipc-leds-gpio-elkhartlake.c
new file mode 100644 (file)
index 0000000..6ba21db
--- /dev/null
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Siemens SIMATIC IPC driver for GPIO based LEDs
+ *
+ * Copyright (c) Siemens AG, 2023
+ *
+ * Author:
+ *  Henning Schild <henning.schild@siemens.com>
+ */
+
+#include <linux/gpio/machine.h>
+#include <linux/gpio/consumer.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/x86/simatic-ipc-base.h>
+
+#include "simatic-ipc-leds-gpio.h"
+
+static struct gpiod_lookup_table simatic_ipc_led_gpio_table = {
+       .dev_id = "leds-gpio",
+       .table = {
+               GPIO_LOOKUP_IDX("INTC1020:04", 72, NULL, 0, GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP_IDX("INTC1020:04", 77, NULL, 1, GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP_IDX("INTC1020:04", 78, NULL, 2, GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP_IDX("INTC1020:04", 58, NULL, 3, GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP_IDX("INTC1020:04", 60, NULL, 4, GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP_IDX("INTC1020:04", 62, NULL, 5, GPIO_ACTIVE_HIGH),
+               {} /* Terminating entry */
+       },
+};
+
+static int simatic_ipc_leds_gpio_elkhartlake_probe(struct platform_device *pdev)
+{
+       return simatic_ipc_leds_gpio_probe(pdev, &simatic_ipc_led_gpio_table,
+                                          NULL);
+}
+
+static int simatic_ipc_leds_gpio_elkhartlake_remove(struct platform_device *pdev)
+{
+       return simatic_ipc_leds_gpio_remove(pdev, &simatic_ipc_led_gpio_table,
+                                           NULL);
+}
+
+static struct platform_driver simatic_ipc_led_gpio_elkhartlake_driver = {
+       .probe = simatic_ipc_leds_gpio_elkhartlake_probe,
+       .remove = simatic_ipc_leds_gpio_elkhartlake_remove,
+       .driver = {
+               .name = KBUILD_MODNAME,
+       },
+};
+module_platform_driver(simatic_ipc_led_gpio_elkhartlake_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" KBUILD_MODNAME);
+MODULE_SOFTDEP("pre: simatic-ipc-leds-gpio-core platform:elkhartlake-pinctrl");
+MODULE_AUTHOR("Henning Schild <henning.schild@siemens.com>");
index bf258c3..3d4877a 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Siemens SIMATIC IPC driver for GPIO based LEDs
  *
index cc32615..58f3352 100644 (file)
@@ -609,18 +609,7 @@ static struct led_trigger netdev_led_trigger = {
        .groups = netdev_trig_groups,
 };
 
-static int __init netdev_trig_init(void)
-{
-       return led_trigger_register(&netdev_led_trigger);
-}
-
-static void __exit netdev_trig_exit(void)
-{
-       led_trigger_unregister(&netdev_led_trigger);
-}
-
-module_init(netdev_trig_init);
-module_exit(netdev_trig_exit);
+module_led_trigger(netdev_led_trigger);
 
 MODULE_AUTHOR("Ben Whitten <ben.whitten@gmail.com>");
 MODULE_AUTHOR("Oliver Jowett <oliver@opencloud.com>");
index f62db7e..8ae0d2d 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/tty.h>
 #include <uapi/linux/serial.h>
 
+#define LEDTRIG_TTY_INTERVAL   50
+
 struct ledtrig_tty_data {
        struct led_classdev *led_cdev;
        struct delayed_work dwork;
@@ -122,17 +124,19 @@ static void ledtrig_tty_work(struct work_struct *work)
 
        if (icount.rx != trigger_data->rx ||
            icount.tx != trigger_data->tx) {
-               led_set_brightness_sync(trigger_data->led_cdev, LED_ON);
+               unsigned long interval = LEDTRIG_TTY_INTERVAL;
+
+               led_blink_set_oneshot(trigger_data->led_cdev, &interval,
+                                     &interval, 0);
 
                trigger_data->rx = icount.rx;
                trigger_data->tx = icount.tx;
-       } else {
-               led_set_brightness_sync(trigger_data->led_cdev, LED_OFF);
        }
 
 out:
        mutex_unlock(&trigger_data->mutex);
-       schedule_delayed_work(&trigger_data->dwork, msecs_to_jiffies(100));
+       schedule_delayed_work(&trigger_data->dwork,
+                             msecs_to_jiffies(LEDTRIG_TTY_INTERVAL * 2));
 }
 
 static struct attribute *ledtrig_tty_attrs[] = {
index 7320337..3d361c9 100644 (file)
@@ -209,17 +209,7 @@ static struct miscdevice uleds_misc = {
        .name           = ULEDS_NAME,
 };
 
-static int __init uleds_init(void)
-{
-       return misc_register(&uleds_misc);
-}
-module_init(uleds_init);
-
-static void __exit uleds_exit(void)
-{
-       misc_deregister(&uleds_misc);
-}
-module_exit(uleds_exit);
+module_misc_device(uleds_misc);
 
 MODULE_AUTHOR("David Lechner <david@lechnology.com>");
 MODULE_DESCRIPTION("Userspace driver for the LED subsystem");
index 22243ca..537f7bf 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/io.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 #define INTR_STAT_OFS  0x0
 #define INTR_SET_OFS   0x8
index aa0a4d8..27a510d 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #define INTR_STAT_OFS  0x0
 #define INTR_SET_OFS   0x8
index bf6e86b..a2b8839 100644 (file)
@@ -1501,16 +1501,12 @@ static int flexrm_mbox_probe(struct platform_device *pdev)
        mbox->dev = dev;
        platform_set_drvdata(pdev, mbox);
 
-       /* Get resource for registers */
-       iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       /* Get resource for registers and map registers of all rings */
+       mbox->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &iomem);
        if (!iomem || (resource_size(iomem) < RING_REGS_SIZE)) {
                ret = -ENODEV;
                goto fail;
-       }
-
-       /* Map registers of all rings */
-       mbox->regs = devm_ioremap_resource(&pdev->dev, iomem);
-       if (IS_ERR(mbox->regs)) {
+       } else if (IS_ERR(mbox->regs)) {
                ret = PTR_ERR(mbox->regs);
                goto fail;
        }
index 8c95e3c..d67db63 100644 (file)
@@ -694,7 +694,7 @@ pdc_receive(struct pdc_state *pdcs)
  * pdc_tx_list_sg_add() - Add the buffers in a scatterlist to the transmit
  * descriptors for a given SPU. The scatterlist buffers contain the data for a
  * SPU request message.
- * @spu_idx:   The index of the SPU to submit the request to, [0, max_spu)
+ * @pdcs:      PDC state for the SPU that will process this request
  * @sg:        Scatterlist whose buffers contain part of the SPU request
  *
  * If a scatterlist buffer is larger than PDC_DMA_BUF_MAX, multiple descriptors
@@ -861,7 +861,7 @@ static int pdc_rx_list_init(struct pdc_state *pdcs, struct scatterlist *dst_sg,
  * pdc_rx_list_sg_add() - Add the buffers in a scatterlist to the receive
  * descriptors for a given SPU. The caller must have already DMA mapped the
  * scatterlist.
- * @spu_idx:    Indicates which SPU the buffers are for
+ * @pdcs:       PDC state for the SPU that will process this request
  * @sg:         Scatterlist whose buffers are added to the receive ring
  *
  * If a receive buffer in the scatterlist is larger than PDC_DMA_BUF_MAX,
@@ -960,7 +960,7 @@ static irqreturn_t pdc_irq_handler(int irq, void *data)
 /**
  * pdc_tasklet_cb() - Tasklet callback that runs the deferred processing after
  * a DMA receive interrupt. Reenables the receive interrupt.
- * @data: PDC state structure
+ * @t: Pointer to the Altera sSGDMA channel structure
  */
 static void pdc_tasklet_cb(struct tasklet_struct *t)
 {
@@ -1566,19 +1566,13 @@ static int pdc_probe(struct platform_device *pdev)
        if (err)
                goto cleanup_ring_pool;
 
-       pdc_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!pdc_regs) {
-               err = -ENODEV;
-               goto cleanup_ring_pool;
-       }
-       dev_dbg(dev, "PDC register region res.start = %pa, res.end = %pa",
-               &pdc_regs->start, &pdc_regs->end);
-
-       pdcs->pdc_reg_vbase = devm_ioremap_resource(&pdev->dev, pdc_regs);
+       pdcs->pdc_reg_vbase = devm_platform_get_and_ioremap_resource(pdev, 0, &pdc_regs);
        if (IS_ERR(pdcs->pdc_reg_vbase)) {
                err = PTR_ERR(pdcs->pdc_reg_vbase);
                goto cleanup_ring_pool;
        }
+       dev_dbg(dev, "PDC register region res.start = %pa, res.end = %pa",
+               &pdc_regs->start, &pdc_regs->end);
 
        /* create rx buffer pool after dt read to know how big buffers are */
        err = pdc_rx_buf_pool_create(pdcs);
index ab24e73..17c29e9 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/iopoll.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
index 1c73c63..f77741c 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/kfifo.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
index 20f2ec8..3ef4dd8 100644 (file)
@@ -14,7 +14,8 @@
 #include <linux/kernel.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/suspend.h>
 #include <linux/slab.h>
index 162df49..20ee283 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/mailbox_controller.h>
 #include <soc/microchip/mpfs.h>
index fc6a12a..22d6018 100644 (file)
@@ -367,8 +367,7 @@ static int mbox_test_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        /* It's okay for MMIO to be NULL */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       tdev->tx_mmio = devm_ioremap_resource(&pdev->dev, res);
+       tdev->tx_mmio = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (PTR_ERR(tdev->tx_mmio) == -EBUSY) {
                /* if reserved area in SRAM, try just ioremap */
                size = resource_size(res);
@@ -378,8 +377,7 @@ static int mbox_test_probe(struct platform_device *pdev)
        }
 
        /* If specified, second reg entry is Rx MMIO */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       tdev->rx_mmio = devm_ioremap_resource(&pdev->dev, res);
+       tdev->rx_mmio = devm_platform_get_and_ioremap_resource(pdev, 1, &res);
        if (PTR_ERR(tdev->rx_mmio) == -EBUSY) {
                size = resource_size(res);
                tdev->rx_mmio = devm_ioremap(&pdev->dev, res->start, size);
@@ -390,7 +388,7 @@ static int mbox_test_probe(struct platform_device *pdev)
        tdev->tx_channel = mbox_test_request_channel(pdev, "tx");
        tdev->rx_channel = mbox_test_request_channel(pdev, "rx");
 
-       if (!tdev->tx_channel && !tdev->rx_channel)
+       if (IS_ERR_OR_NULL(tdev->tx_channel) && IS_ERR_OR_NULL(tdev->rx_channel))
                return -EPROBE_DEFER;
 
        /* If Rx is not specified but has Rx MMIO, then Rx = Tx */
index adf36c0..ebff3ba 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/bitops.h>
 #include <linux/mailbox_client.h>
 #include <linux/mailbox_controller.h>
+#include <linux/of.h>
 
 #include "mailbox.h"
 
index 14bc005..91487aa 100644 (file)
@@ -10,7 +10,8 @@
 #include <linux/kernel.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 struct mtk_adsp_mbox_priv {
index b18d47e..4d62b07 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/mailbox_controller.h>
 #include <linux/mailbox/mtk-cmdq-mailbox.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #define CMDQ_OP_CODE_MASK              (0xff << CMDQ_OP_CODE_SHIFT)
 #define CMDQ_NUM_CMD(t)                        (t->cmd_buf_size / CMDQ_INST_SIZE)
index fa2ce32..792bcae 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/kfifo.h>
 #include <linux/err.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/omap-mailbox.h>
index a5922ac..834aecd 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/mailbox_controller.h>
@@ -135,10 +136,8 @@ static int platform_mhu_probe(struct platform_device *pdev)
        for (i = 0; i < MHU_CHANS; i++) {
                mhu->chan[i].con_priv = &mhu->mlink[i];
                mhu->mlink[i].irq = platform_get_irq(pdev, i);
-               if (mhu->mlink[i].irq < 0) {
-                       dev_err(dev, "failed to get irq%d\n", i);
+               if (mhu->mlink[i].irq < 0)
                        return mhu->mlink[i].irq;
-               }
                mhu->mlink[i].rx_reg = mhu->base + platform_mhu_reg[i];
                mhu->mlink[i].tx_reg = mhu->mlink[i].rx_reg + TX_REG_OFFSET;
        }
index 7e27acf..f597a1b 100644 (file)
@@ -227,10 +227,8 @@ static int qcom_ipcc_setup_mbox(struct qcom_ipcc *ipcc,
                        ret = of_parse_phandle_with_args(client_dn, "mboxes",
                                                "#mbox-cells", j, &curr_ph);
                        of_node_put(curr_ph.np);
-                       if (!ret && curr_ph.np == controller_dn) {
+                       if (!ret && curr_ph.np == controller_dn)
                                ipcc->num_chans++;
-                               break;
-                       }
                }
        }
 
index 116286e..8ffad05 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/mailbox_controller.h>
+#include <linux/of.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 #define MAILBOX_A2B_INTEN              0x00
@@ -194,11 +194,7 @@ static int rockchip_mbox_probe(struct platform_device *pdev)
        mb->mbox.ops = &rockchip_mbox_chan_ops;
        mb->mbox.txdone_irq = true;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
-       mb->mbox_base = devm_ioremap_resource(&pdev->dev, res);
+       mb->mbox_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(mb->mbox_base))
                return PTR_ERR(mb->mbox_base);
 
index e3c899a..9ae57de 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/io.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 
index 15d538f..4ad3653 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/io.h>
 #include <linux/mailbox_controller.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
 
index 7f98e74..fe29fc2 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/io.h>
 #include <linux/mailbox_controller.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
@@ -728,7 +727,6 @@ static int tegra_hsp_request_shared_irq(struct tegra_hsp *hsp)
 static int tegra_hsp_probe(struct platform_device *pdev)
 {
        struct tegra_hsp *hsp;
-       struct resource *res;
        unsigned int i;
        u32 value;
        int err;
@@ -742,8 +740,7 @@ static int tegra_hsp_probe(struct platform_device *pdev)
        INIT_LIST_HEAD(&hsp->doorbells);
        spin_lock_init(&hsp->lock);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       hsp->regs = devm_ioremap_resource(&pdev->dev, res);
+       hsp->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(hsp->regs))
                return PTR_ERR(hsp->regs);
 
index 03048cb..a94577f 100644 (file)
@@ -812,7 +812,6 @@ static int ti_msgmgr_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        const struct of_device_id *of_id;
        struct device_node *np;
-       struct resource *res;
        const struct ti_msgmgr_desc *desc;
        struct ti_msgmgr_inst *inst;
        struct ti_queue_inst *qinst;
@@ -843,22 +842,19 @@ static int ti_msgmgr_probe(struct platform_device *pdev)
        inst->dev = dev;
        inst->desc = desc;
 
-       res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-                                          desc->data_region_name);
-       inst->queue_proxy_region = devm_ioremap_resource(dev, res);
+       inst->queue_proxy_region =
+               devm_platform_ioremap_resource_byname(pdev, desc->data_region_name);
        if (IS_ERR(inst->queue_proxy_region))
                return PTR_ERR(inst->queue_proxy_region);
 
-       res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-                                          desc->status_region_name);
-       inst->queue_state_debug_region = devm_ioremap_resource(dev, res);
+       inst->queue_state_debug_region =
+               devm_platform_ioremap_resource_byname(pdev, desc->status_region_name);
        if (IS_ERR(inst->queue_state_debug_region))
                return PTR_ERR(inst->queue_state_debug_region);
 
        if (desc->is_sproxy) {
-               res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-                                                  desc->ctrl_region_name);
-               inst->queue_ctrl_region = devm_ioremap_resource(dev, res);
+               inst->queue_ctrl_region =
+                       devm_platform_ioremap_resource_byname(pdev, desc->ctrl_region_name);
                if (IS_ERR(inst->queue_ctrl_region))
                        return PTR_ERR(inst->queue_ctrl_region);
        }
index d097f45..e4fcac9 100644 (file)
@@ -16,8 +16,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 
 /* IPI agent ID any */
index 9b00b56..cf8e5f1 100644 (file)
@@ -533,7 +533,7 @@ struct dvb_frontend *ascot2e_attach(struct dvb_frontend *fe,
                priv->i2c_address, priv->i2c);
        return fe;
 }
-EXPORT_SYMBOL(ascot2e_attach);
+EXPORT_SYMBOL_GPL(ascot2e_attach);
 
 MODULE_DESCRIPTION("Sony ASCOT2E terr/cab tuner driver");
 MODULE_AUTHOR("info@netup.ru");
index bdd16b9..778c865 100644 (file)
@@ -489,7 +489,7 @@ error_out:
        return NULL;
 
 }
-EXPORT_SYMBOL(atbm8830_attach);
+EXPORT_SYMBOL_GPL(atbm8830_attach);
 
 MODULE_DESCRIPTION("AltoBeam ATBM8830/8831 GB20600 demodulator driver");
 MODULE_AUTHOR("David T. L. Wong <davidtlwong@gmail.com>");
index 78cafdf..230436b 100644 (file)
@@ -879,7 +879,7 @@ error:
        au8522_release_state(state);
        return NULL;
 }
-EXPORT_SYMBOL(au8522_attach);
+EXPORT_SYMBOL_GPL(au8522_attach);
 
 static const struct dvb_frontend_ops au8522_ops = {
        .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
index 68b92b4..b3f5c49 100644 (file)
@@ -835,7 +835,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(bcm3510_attach);
+EXPORT_SYMBOL_GPL(bcm3510_attach);
 
 static const struct dvb_frontend_ops bcm3510_ops = {
        .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
index b39ff51..1d04c0a 100644 (file)
@@ -432,4 +432,4 @@ MODULE_DESCRIPTION("Conexant CX22700 DVB-T Demodulator driver");
 MODULE_AUTHOR("Holger Waechtler");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(cx22700_attach);
+EXPORT_SYMBOL_GPL(cx22700_attach);
index cc6acbf..61ad34b 100644 (file)
@@ -604,7 +604,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(cx22702_attach);
+EXPORT_SYMBOL_GPL(cx22702_attach);
 
 static const struct dvb_frontend_ops cx22702_ops = {
        .delsys = { SYS_DVBT },
index 6f99d6a..9aeea08 100644 (file)
@@ -653,4 +653,4 @@ MODULE_DESCRIPTION("Conexant CX24110 DVB-S Demodulator driver");
 MODULE_AUTHOR("Peter Hettkamp");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(cx24110_attach);
+EXPORT_SYMBOL_GPL(cx24110_attach);
index dd55d31..203cb6b 100644 (file)
@@ -590,7 +590,7 @@ error:
 
        return NULL;
 }
-EXPORT_SYMBOL(cx24113_attach);
+EXPORT_SYMBOL_GPL(cx24113_attach);
 
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Activates frontend debugging (default:0)");
index ea8264c..8b978a9 100644 (file)
@@ -1133,7 +1133,7 @@ struct dvb_frontend *cx24116_attach(const struct cx24116_config *config,
        state->frontend.demodulator_priv = state;
        return &state->frontend;
 }
-EXPORT_SYMBOL(cx24116_attach);
+EXPORT_SYMBOL_GPL(cx24116_attach);
 
 /*
  * Initialise or wake up device
index 0f77866..44515fd 100644 (file)
@@ -305,7 +305,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(cx24120_attach);
+EXPORT_SYMBOL_GPL(cx24120_attach);
 
 static int cx24120_test_rom(struct cx24120_state *state)
 {
index 3d84ee1..539889e 100644 (file)
@@ -1096,7 +1096,7 @@ error:
 
        return NULL;
 }
-EXPORT_SYMBOL(cx24123_attach);
+EXPORT_SYMBOL_GPL(cx24123_attach);
 
 static const struct dvb_frontend_ops cx24123_ops = {
        .delsys = { SYS_DVBS },
index d7ee294..7feb08d 100644 (file)
@@ -536,7 +536,7 @@ struct dvb_frontend *cxd2820r_attach(const struct cxd2820r_config *config,
 
        return pdata.get_dvb_frontend(client);
 }
-EXPORT_SYMBOL(cxd2820r_attach);
+EXPORT_SYMBOL_GPL(cxd2820r_attach);
 
 static struct dvb_frontend *cxd2820r_get_dvb_frontend(struct i2c_client *client)
 {
index ef403a9..d925ca2 100644 (file)
@@ -3930,14 +3930,14 @@ struct dvb_frontend *cxd2841er_attach_s(struct cxd2841er_config *cfg,
 {
        return cxd2841er_attach(cfg, i2c, SYS_DVBS);
 }
-EXPORT_SYMBOL(cxd2841er_attach_s);
+EXPORT_SYMBOL_GPL(cxd2841er_attach_s);
 
 struct dvb_frontend *cxd2841er_attach_t_c(struct cxd2841er_config *cfg,
                                        struct i2c_adapter *i2c)
 {
        return cxd2841er_attach(cfg, i2c, 0);
 }
-EXPORT_SYMBOL(cxd2841er_attach_t_c);
+EXPORT_SYMBOL_GPL(cxd2841er_attach_t_c);
 
 static const struct dvb_frontend_ops cxd2841er_dvbs_s2_ops = {
        .delsys = { SYS_DVBS, SYS_DVBS2 },
index f67b6d2..a06d836 100644 (file)
@@ -1950,7 +1950,7 @@ struct dvb_frontend *cxd2880_attach(struct dvb_frontend *fe,
 
        return fe;
 }
-EXPORT_SYMBOL(cxd2880_attach);
+EXPORT_SYMBOL_GPL(cxd2880_attach);
 
 MODULE_DESCRIPTION("Sony CXD2880 DVB-T2/T tuner + demod driver");
 MODULE_AUTHOR("Sony Semiconductor Solutions Corporation");
index cafb41d..9a8e7cd 100644 (file)
@@ -762,7 +762,7 @@ free_mem:
        fe->tuner_priv = NULL;
        return NULL;
 }
-EXPORT_SYMBOL(dib0070_attach);
+EXPORT_SYMBOL_GPL(dib0070_attach);
 
 MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>");
 MODULE_DESCRIPTION("Driver for the DiBcom 0070 base-band RF Tuner");
index 903da33..c958bcf 100644 (file)
@@ -2634,7 +2634,7 @@ struct dvb_frontend *dib0090_register(struct dvb_frontend *fe, struct i2c_adapte
        return NULL;
 }
 
-EXPORT_SYMBOL(dib0090_register);
+EXPORT_SYMBOL_GPL(dib0090_register);
 
 struct dvb_frontend *dib0090_fw_register(struct dvb_frontend *fe, struct i2c_adapter *i2c, const struct dib0090_config *config)
 {
@@ -2660,7 +2660,7 @@ free_mem:
        fe->tuner_priv = NULL;
        return NULL;
 }
-EXPORT_SYMBOL(dib0090_fw_register);
+EXPORT_SYMBOL_GPL(dib0090_fw_register);
 
 MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>");
 MODULE_AUTHOR("Olivier Grenie <olivier.grenie@parrot.com>");
index a6c2fc4..c598b2a 100644 (file)
@@ -815,4 +815,4 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(dib3000mb_attach);
+EXPORT_SYMBOL_GPL(dib3000mb_attach);
index 2e11a24..c2fca82 100644 (file)
@@ -935,7 +935,7 @@ error:
        kfree(st);
        return NULL;
 }
-EXPORT_SYMBOL(dib3000mc_attach);
+EXPORT_SYMBOL_GPL(dib3000mc_attach);
 
 static const struct dvb_frontend_ops dib3000mc_ops = {
        .delsys = { SYS_DVBT },
index 97ce977..fdb22f3 100644 (file)
@@ -1434,7 +1434,7 @@ error:
        kfree(st);
        return NULL;
 }
-EXPORT_SYMBOL(dib7000m_attach);
+EXPORT_SYMBOL_GPL(dib7000m_attach);
 
 static const struct dvb_frontend_ops dib7000m_ops = {
        .delsys = { SYS_DVBT },
index 9273758..444fe1c 100644 (file)
@@ -2822,7 +2822,7 @@ void *dib7000p_attach(struct dib7000p_ops *ops)
 
        return ops;
 }
-EXPORT_SYMBOL(dib7000p_attach);
+EXPORT_SYMBOL_GPL(dib7000p_attach);
 
 static const struct dvb_frontend_ops dib7000p_ops = {
        .delsys = { SYS_DVBT },
index 2abda7d..2f51659 100644 (file)
@@ -4527,7 +4527,7 @@ void *dib8000_attach(struct dib8000_ops *ops)
 
        return ops;
 }
-EXPORT_SYMBOL(dib8000_attach);
+EXPORT_SYMBOL_GPL(dib8000_attach);
 
 MODULE_AUTHOR("Olivier Grenie <Olivier.Grenie@parrot.com, Patrick Boettcher <patrick.boettcher@posteo.de>");
 MODULE_DESCRIPTION("Driver for the DiBcom 8000 ISDB-T demodulator");
index 1c57587..83cf6ea 100644 (file)
@@ -2546,7 +2546,7 @@ error:
        kfree(st);
        return NULL;
 }
-EXPORT_SYMBOL(dib9000_attach);
+EXPORT_SYMBOL_GPL(dib9000_attach);
 
 static const struct dvb_frontend_ops dib9000_ops = {
        .delsys = { SYS_DVBT },
index 68f4e8b..a738573 100644 (file)
@@ -12372,7 +12372,7 @@ error:
 
        return NULL;
 }
-EXPORT_SYMBOL(drx39xxj_attach);
+EXPORT_SYMBOL_GPL(drx39xxj_attach);
 
 static const struct dvb_frontend_ops drx39xxj_ops = {
        .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
index 9860cae..6a53193 100644 (file)
@@ -2939,7 +2939,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(drxd_attach);
+EXPORT_SYMBOL_GPL(drxd_attach);
 
 MODULE_DESCRIPTION("DRXD driver");
 MODULE_AUTHOR("Micronas");
index 2770bae..87f3d4f 100644 (file)
@@ -6814,7 +6814,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(drxk_attach);
+EXPORT_SYMBOL_GPL(drxk_attach);
 
 MODULE_DESCRIPTION("DRX-K driver");
 MODULE_AUTHOR("Ralph Metzler");
index 20fcf31..515aa7c 100644 (file)
@@ -859,7 +859,7 @@ struct dvb_frontend *ds3000_attach(const struct ds3000_config *config,
        ds3000_set_voltage(&state->frontend, SEC_VOLTAGE_OFF);
        return &state->frontend;
 }
-EXPORT_SYMBOL(ds3000_attach);
+EXPORT_SYMBOL_GPL(ds3000_attach);
 
 static int ds3000_set_carrier_offset(struct dvb_frontend *fe,
                                        s32 carrier_offset_khz)
index 90cb41e..ef697ab 100644 (file)
@@ -866,7 +866,7 @@ out:
 
        return NULL;
 }
-EXPORT_SYMBOL(dvb_pll_attach);
+EXPORT_SYMBOL_GPL(dvb_pll_attach);
 
 
 static int
index 03bd806..2ad0a3c 100644 (file)
@@ -299,7 +299,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(ec100_attach);
+EXPORT_SYMBOL_GPL(ec100_attach);
 
 static const struct dvb_frontend_ops ec100_ops = {
        .delsys = { SYS_DVBT },
index 68c1a3e..f127ade 100644 (file)
@@ -1025,7 +1025,7 @@ struct dvb_frontend *helene_attach_s(struct dvb_frontend *fe,
                        priv->i2c_address, priv->i2c);
        return fe;
 }
-EXPORT_SYMBOL(helene_attach_s);
+EXPORT_SYMBOL_GPL(helene_attach_s);
 
 struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
                const struct helene_config *config,
@@ -1061,7 +1061,7 @@ struct dvb_frontend *helene_attach(struct dvb_frontend *fe,
                        priv->i2c_address, priv->i2c);
        return fe;
 }
-EXPORT_SYMBOL(helene_attach);
+EXPORT_SYMBOL_GPL(helene_attach);
 
 static int helene_probe(struct i2c_client *client)
 {
index 24bf5cb..0330b78 100644 (file)
@@ -395,7 +395,7 @@ struct dvb_frontend *horus3a_attach(struct dvb_frontend *fe,
                priv->i2c_address, priv->i2c);
        return fe;
 }
-EXPORT_SYMBOL(horus3a_attach);
+EXPORT_SYMBOL_GPL(horus3a_attach);
 
 MODULE_DESCRIPTION("Sony HORUS3A satellite tuner driver");
 MODULE_AUTHOR("Sergey Kozlov <serjk@netup.ru>");
index 2cd69b4..7d28a74 100644 (file)
@@ -141,7 +141,7 @@ struct dvb_frontend *isl6405_attach(struct dvb_frontend *fe, struct i2c_adapter
 
        return fe;
 }
-EXPORT_SYMBOL(isl6405_attach);
+EXPORT_SYMBOL_GPL(isl6405_attach);
 
 MODULE_DESCRIPTION("Driver for lnb supply and control ic isl6405");
 MODULE_AUTHOR("Hartmut Hackmann & Oliver Endriss");
index 43b0dfc..2e9f6f1 100644 (file)
@@ -213,7 +213,7 @@ struct dvb_frontend *isl6421_attach(struct dvb_frontend *fe, struct i2c_adapter
 
        return fe;
 }
-EXPORT_SYMBOL(isl6421_attach);
+EXPORT_SYMBOL_GPL(isl6421_attach);
 
 MODULE_DESCRIPTION("Driver for lnb supply and control ic isl6421");
 MODULE_AUTHOR("Andrew de Quincey & Oliver Endriss");
index 8cd1bb8..a0d0a38 100644 (file)
@@ -289,7 +289,7 @@ exit:
        fe->sec_priv = NULL;
        return NULL;
 }
-EXPORT_SYMBOL(isl6423_attach);
+EXPORT_SYMBOL_GPL(isl6423_attach);
 
 MODULE_DESCRIPTION("ISL6423 SEC");
 MODULE_AUTHOR("Manu Abraham");
index 1b33478..f8f362f 100644 (file)
@@ -389,7 +389,7 @@ struct dvb_frontend *itd1000_attach(struct dvb_frontend *fe, struct i2c_adapter
 
        return fe;
 }
-EXPORT_SYMBOL(itd1000_attach);
+EXPORT_SYMBOL_GPL(itd1000_attach);
 
 MODULE_AUTHOR("Patrick Boettcher <pb@linuxtv.org>");
 MODULE_DESCRIPTION("Integrant ITD1000 driver");
index 73f2710..3212e33 100644 (file)
@@ -302,7 +302,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(ix2505v_attach);
+EXPORT_SYMBOL_GPL(ix2505v_attach);
 
 module_param_named(debug, ix2505v_debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
index c5106a1..fe5af24 100644 (file)
@@ -593,4 +593,4 @@ MODULE_DESCRIPTION("LSI L64781 DVB-T Demodulator driver");
 MODULE_AUTHOR("Holger Waechtler, Marko Kohtala");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(l64781_attach);
+EXPORT_SYMBOL_GPL(l64781_attach);
index f343066..fe700aa 100644 (file)
@@ -1426,7 +1426,7 @@ struct dvb_frontend *lg2160_attach(const struct lg2160_config *config,
 
        return &state->frontend;
 }
-EXPORT_SYMBOL(lg2160_attach);
+EXPORT_SYMBOL_GPL(lg2160_attach);
 
 MODULE_DESCRIPTION("LG Electronics LG216x ATSC/MH Demodulator Driver");
 MODULE_AUTHOR("Michael Krufky <mkrufky@linuxtv.org>");
index c15d373..bdc8311 100644 (file)
@@ -1148,7 +1148,7 @@ fail:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(lgdt3305_attach);
+EXPORT_SYMBOL_GPL(lgdt3305_attach);
 
 static const struct dvb_frontend_ops lgdt3304_ops = {
        .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
index 3c6650f..2638875 100644 (file)
@@ -1859,7 +1859,7 @@ fail:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(lgdt3306a_attach);
+EXPORT_SYMBOL_GPL(lgdt3306a_attach);
 
 #ifdef DBG_DUMP
 
index 97a1099..081d6ad 100644 (file)
@@ -927,7 +927,7 @@ struct dvb_frontend *lgdt330x_attach(const struct lgdt330x_config *_config,
 
        return lgdt330x_get_dvb_frontend(client);
 }
-EXPORT_SYMBOL(lgdt330x_attach);
+EXPORT_SYMBOL_GPL(lgdt330x_attach);
 
 static const struct dvb_frontend_ops lgdt3302_ops = {
        .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
index 3001497..ffaf60e 100644 (file)
@@ -1043,7 +1043,7 @@ error_out:
        return NULL;
 
 }
-EXPORT_SYMBOL(lgs8gxx_attach);
+EXPORT_SYMBOL_GPL(lgs8gxx_attach);
 
 MODULE_DESCRIPTION("Legend Silicon LGS8913/LGS8GXX DMB-TH demodulator driver");
 MODULE_AUTHOR("David T. L. Wong <davidtlwong@gmail.com>");
index 9ffe06c..41bec05 100644 (file)
@@ -173,7 +173,7 @@ struct dvb_frontend *lnbh25_attach(struct dvb_frontend *fe,
                __func__, priv->i2c_address);
        return fe;
 }
-EXPORT_SYMBOL(lnbh25_attach);
+EXPORT_SYMBOL_GPL(lnbh25_attach);
 
 MODULE_DESCRIPTION("ST LNBH25 driver");
 MODULE_AUTHOR("info@netup.ru");
index e564974..32593b1 100644 (file)
@@ -155,7 +155,7 @@ struct dvb_frontend *lnbh24_attach(struct dvb_frontend *fe,
        return lnbx2x_attach(fe, i2c, override_set, override_clear,
                                                        i2c_addr, LNBH24_TTX);
 }
-EXPORT_SYMBOL(lnbh24_attach);
+EXPORT_SYMBOL_GPL(lnbh24_attach);
 
 struct dvb_frontend *lnbp21_attach(struct dvb_frontend *fe,
                                struct i2c_adapter *i2c, u8 override_set,
@@ -164,7 +164,7 @@ struct dvb_frontend *lnbp21_attach(struct dvb_frontend *fe,
        return lnbx2x_attach(fe, i2c, override_set, override_clear,
                                                        0x08, LNBP21_ISEL);
 }
-EXPORT_SYMBOL(lnbp21_attach);
+EXPORT_SYMBOL_GPL(lnbp21_attach);
 
 MODULE_DESCRIPTION("Driver for lnb supply and control ic lnbp21, lnbh24");
 MODULE_AUTHOR("Oliver Endriss, Igor M. Liplianin");
index b8c7145..cb4ea5d 100644 (file)
@@ -125,7 +125,7 @@ struct dvb_frontend *lnbp22_attach(struct dvb_frontend *fe,
 
        return fe;
 }
-EXPORT_SYMBOL(lnbp22_attach);
+EXPORT_SYMBOL_GPL(lnbp22_attach);
 
 MODULE_DESCRIPTION("Driver for lnb supply and control ic lnbp22");
 MODULE_AUTHOR("Dominik Kuhlen");
index cf49ac5..cf037b6 100644 (file)
@@ -1695,7 +1695,7 @@ struct dvb_frontend *m88ds3103_attach(const struct m88ds3103_config *cfg,
        *tuner_i2c_adapter = pdata.get_i2c_adapter(client);
        return pdata.get_dvb_frontend(client);
 }
-EXPORT_SYMBOL(m88ds3103_attach);
+EXPORT_SYMBOL_GPL(m88ds3103_attach);
 
 static const struct dvb_frontend_ops m88ds3103_ops = {
        .delsys = {SYS_DVBS, SYS_DVBS2},
index b294ba8..2aa9820 100644 (file)
@@ -808,7 +808,7 @@ error:
 
        return NULL;
 }
-EXPORT_SYMBOL(m88rs2000_attach);
+EXPORT_SYMBOL_GPL(m88rs2000_attach);
 
 MODULE_DESCRIPTION("M88RS2000 DVB-S Demodulator driver");
 MODULE_AUTHOR("Malcolm Priestley tvboxspy@gmail.com");
index 3ec2cb4..0fc4589 100644 (file)
@@ -1853,6 +1853,6 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(mb86a16_attach);
+EXPORT_SYMBOL_GPL(mb86a16_attach);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Manu Abraham");
index 125fed4..f8e4bbe 100644 (file)
@@ -2078,7 +2078,7 @@ struct dvb_frontend *mb86a20s_attach(const struct mb86a20s_config *config,
        dev_info(&i2c->dev, "Detected a Fujitsu mb86a20s frontend\n");
        return &state->frontend;
 }
-EXPORT_SYMBOL(mb86a20s_attach);
+EXPORT_SYMBOL_GPL(mb86a20s_attach);
 
 static const struct dvb_frontend_ops mb86a20s_ops = {
        .delsys = { SYS_ISDBT },
index d43a670..fb867dd 100644 (file)
@@ -827,7 +827,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(mt312_attach);
+EXPORT_SYMBOL_GPL(mt312_attach);
 
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
index 399d5c5..1b2889f 100644 (file)
@@ -593,4 +593,4 @@ MODULE_DESCRIPTION("Zarlink MT352 DVB-T Demodulator driver");
 MODULE_AUTHOR("Holger Waechtler, Daniel Mack, Antonio Mancuso");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(mt352_attach);
+EXPORT_SYMBOL_GPL(mt352_attach);
index 200b6db..1c549ad 100644 (file)
@@ -1216,5 +1216,5 @@ MODULE_DESCRIPTION("NXT200X (ATSC 8VSB & ITU-T J.83 AnnexB 64/256 QAM) Demodulat
 MODULE_AUTHOR("Kirk Lapray, Michael Krufky, Jean-Francois Thibert, and Taylor Jacob");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(nxt200x_attach);
+EXPORT_SYMBOL_GPL(nxt200x_attach);
 
index 136918f..e8d4940 100644 (file)
@@ -621,4 +621,4 @@ MODULE_DESCRIPTION("NxtWave NXT6000 DVB-T demodulator driver");
 MODULE_AUTHOR("Florian Schirmer");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(nxt6000_attach);
+EXPORT_SYMBOL_GPL(nxt6000_attach);
index 355f359..74e04c7 100644 (file)
@@ -605,4 +605,4 @@ MODULE_AUTHOR("Kirk Lapray");
 MODULE_AUTHOR("Trent Piepho");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(or51132_attach);
+EXPORT_SYMBOL_GPL(or51132_attach);
index ae732dc..2e8e707 100644 (file)
@@ -551,5 +551,5 @@ MODULE_DESCRIPTION("Oren OR51211 VSB [pcHDTV HD-2000] Demodulator Driver");
 MODULE_AUTHOR("Kirk Lapray");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(or51211_attach);
+EXPORT_SYMBOL_GPL(or51211_attach);
 
index 3089cc1..28b1dca 100644 (file)
@@ -981,7 +981,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(s5h1409_attach);
+EXPORT_SYMBOL_GPL(s5h1409_attach);
 
 static const struct dvb_frontend_ops s5h1409_ops = {
        .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
index 2563a72..fc48e65 100644 (file)
@@ -900,7 +900,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(s5h1411_attach);
+EXPORT_SYMBOL_GPL(s5h1411_attach);
 
 static const struct dvb_frontend_ops s5h1411_ops = {
        .delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
index 6bdec28..d700de1 100644 (file)
@@ -918,7 +918,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(s5h1420_attach);
+EXPORT_SYMBOL_GPL(s5h1420_attach);
 
 static const struct dvb_frontend_ops s5h1420_ops = {
        .delsys = { SYS_DVBS },
index 956e8ee..ff5d3bd 100644 (file)
@@ -355,7 +355,7 @@ struct dvb_frontend *s5h1432_attach(const struct s5h1432_config *config,
 
        return &state->frontend;
 }
-EXPORT_SYMBOL(s5h1432_attach);
+EXPORT_SYMBOL_GPL(s5h1432_attach);
 
 static const struct dvb_frontend_ops s5h1432_ops = {
        .delsys = { SYS_DVBT },
index f118d8e..7e461ac 100644 (file)
@@ -495,7 +495,7 @@ struct dvb_frontend *s921_attach(const struct s921_config *config,
 
        return &state->frontend;
 }
-EXPORT_SYMBOL(s921_attach);
+EXPORT_SYMBOL_GPL(s921_attach);
 
 static const struct dvb_frontend_ops s921_ops = {
        .delsys = { SYS_ISDBT },
index 2d29d2c..210ccd3 100644 (file)
@@ -937,7 +937,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(si21xx_attach);
+EXPORT_SYMBOL_GPL(si21xx_attach);
 
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
index 146e7f2..f59c0f9 100644 (file)
@@ -624,4 +624,4 @@ MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
 MODULE_DESCRIPTION("Spase sp887x DVB-T demodulator driver");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(sp887x_attach);
+EXPORT_SYMBOL_GPL(sp887x_attach);
index 4ee6c1e..2f4d8fb 100644 (file)
@@ -1638,7 +1638,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(stb0899_attach);
+EXPORT_SYMBOL_GPL(stb0899_attach);
 MODULE_PARM_DESC(verbose, "Set Verbosity level");
 MODULE_AUTHOR("Manu Abraham");
 MODULE_DESCRIPTION("STB0899 Multi-Std frontend");
index 8c9800d..d74e346 100644 (file)
@@ -232,7 +232,7 @@ struct dvb_frontend *stb6000_attach(struct dvb_frontend *fe, int addr,
 
        return fe;
 }
-EXPORT_SYMBOL(stb6000_attach);
+EXPORT_SYMBOL_GPL(stb6000_attach);
 
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
index 698866c..c5818a1 100644 (file)
@@ -557,7 +557,7 @@ static void stb6100_release(struct dvb_frontend *fe)
        kfree(state);
 }
 
-EXPORT_SYMBOL(stb6100_attach);
+EXPORT_SYMBOL_GPL(stb6100_attach);
 MODULE_PARM_DESC(verbose, "Set Verbosity level");
 
 MODULE_AUTHOR("Manu Abraham");
index 3ae1f3a..a5581bd 100644 (file)
@@ -590,7 +590,7 @@ error:
 
        return NULL;
 }
-EXPORT_SYMBOL(stv0288_attach);
+EXPORT_SYMBOL_GPL(stv0288_attach);
 
 module_param(debug_legacy_dish_switch, int, 0444);
 MODULE_PARM_DESC(debug_legacy_dish_switch,
index 6d5962d..9d4dbd9 100644 (file)
@@ -710,4 +710,4 @@ MODULE_DESCRIPTION("ST STV0297 DVB-C Demodulator driver");
 MODULE_AUTHOR("Dennis Noermann and Andrew de Quincey");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(stv0297_attach);
+EXPORT_SYMBOL_GPL(stv0297_attach);
index b5263a0..da7ff2c 100644 (file)
@@ -752,4 +752,4 @@ MODULE_DESCRIPTION("ST STV0299 DVB Demodulator driver");
 MODULE_AUTHOR("Ralph Metzler, Holger Waechtler, Peter Schildmann, Felix Domke, Andreas Oberritter, Andrew de Quincey, Kenneth Aafly");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(stv0299_attach);
+EXPORT_SYMBOL_GPL(stv0299_attach);
index a93f406..4832643 100644 (file)
@@ -1750,7 +1750,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(stv0367ter_attach);
+EXPORT_SYMBOL_GPL(stv0367ter_attach);
 
 static int stv0367cab_gate_ctrl(struct dvb_frontend *fe, int enable)
 {
@@ -2919,7 +2919,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(stv0367cab_attach);
+EXPORT_SYMBOL_GPL(stv0367cab_attach);
 
 /*
  * Functions for operation on Digital Devices hardware
@@ -3340,7 +3340,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(stv0367ddb_attach);
+EXPORT_SYMBOL_GPL(stv0367ddb_attach);
 
 MODULE_PARM_DESC(debug, "Set debug");
 MODULE_PARM_DESC(i2c_debug, "Set i2c debug");
index 212312d..e7b9b9b 100644 (file)
@@ -1957,7 +1957,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(stv0900_attach);
+EXPORT_SYMBOL_GPL(stv0900_attach);
 
 MODULE_PARM_DESC(debug, "Set debug");
 
index a07dc5f..cc45139 100644 (file)
@@ -5071,7 +5071,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(stv090x_attach);
+EXPORT_SYMBOL_GPL(stv090x_attach);
 
 static const struct i2c_device_id stv090x_id_table[] = {
        {"stv090x", 0},
index 963f6a8..1cf9c09 100644 (file)
@@ -427,7 +427,7 @@ struct dvb_frontend *stv6110_attach(struct dvb_frontend *fe,
 
        return fe;
 }
-EXPORT_SYMBOL(stv6110_attach);
+EXPORT_SYMBOL_GPL(stv6110_attach);
 
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
index 11653f8..c678f47 100644 (file)
@@ -467,7 +467,7 @@ const struct stv6110x_devctl *stv6110x_attach(struct dvb_frontend *fe,
        dev_info(&stv6110x->i2c->dev, "Attaching STV6110x\n");
        return stv6110x->devctl;
 }
-EXPORT_SYMBOL(stv6110x_attach);
+EXPORT_SYMBOL_GPL(stv6110x_attach);
 
 static const struct i2c_device_id stv6110x_id_table[] = {
        {"stv6110x", 0},
index faa6e54..462e12a 100644 (file)
@@ -523,4 +523,4 @@ MODULE_DESCRIPTION("Philips TDA10021 DVB-C demodulator driver");
 MODULE_AUTHOR("Ralph Metzler, Holger Waechtler, Markus Schulz");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(tda10021_attach);
+EXPORT_SYMBOL_GPL(tda10021_attach);
index 8f32edf..4c2541e 100644 (file)
@@ -594,4 +594,4 @@ MODULE_DESCRIPTION("Philips TDA10023 DVB-C demodulator driver");
 MODULE_AUTHOR("Georg Acher, Hartmut Birr");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(tda10023_attach);
+EXPORT_SYMBOL_GPL(tda10023_attach);
index 3cb4e52..5d5e4e9 100644 (file)
@@ -1138,7 +1138,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(tda10048_attach);
+EXPORT_SYMBOL_GPL(tda10048_attach);
 
 static const struct dvb_frontend_ops tda10048_ops = {
        .delsys = { SYS_DVBT },
index 83a798c..6f306db 100644 (file)
@@ -1378,5 +1378,5 @@ MODULE_DESCRIPTION("Philips TDA10045H & TDA10046H DVB-T Demodulator");
 MODULE_AUTHOR("Andrew de Quincey & Robert Schlabbach");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(tda10045_attach);
-EXPORT_SYMBOL(tda10046_attach);
+EXPORT_SYMBOL_GPL(tda10045_attach);
+EXPORT_SYMBOL_GPL(tda10046_attach);
index cdcf976..b449514 100644 (file)
@@ -764,4 +764,4 @@ MODULE_DESCRIPTION("Philips TDA10086 DVB-S Demodulator");
 MODULE_AUTHOR("Andrew de Quincey");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(tda10086_attach);
+EXPORT_SYMBOL_GPL(tda10086_attach);
index 13e8969..346be50 100644 (file)
@@ -227,7 +227,7 @@ struct dvb_frontend *tda665x_attach(struct dvb_frontend *fe,
 
        return fe;
 }
-EXPORT_SYMBOL(tda665x_attach);
+EXPORT_SYMBOL_GPL(tda665x_attach);
 
 MODULE_DESCRIPTION("TDA665x driver");
 MODULE_AUTHOR("Manu Abraham");
index e3e1c3d..44f5362 100644 (file)
@@ -481,4 +481,4 @@ MODULE_DESCRIPTION("Philips TDA8083 DVB-S Demodulator");
 MODULE_AUTHOR("Ralph Metzler, Holger Waechtler");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(tda8083_attach);
+EXPORT_SYMBOL_GPL(tda8083_attach);
index 0d576d4..8b06f92 100644 (file)
@@ -188,7 +188,7 @@ exit:
        return NULL;
 }
 
-EXPORT_SYMBOL(tda8261_attach);
+EXPORT_SYMBOL_GPL(tda8261_attach);
 
 MODULE_AUTHOR("Manu Abraham");
 MODULE_DESCRIPTION("TDA8261 8PSK/QPSK Tuner");
index f9703a1..eafcf5f 100644 (file)
@@ -164,7 +164,7 @@ struct dvb_frontend *tda826x_attach(struct dvb_frontend *fe, int addr, struct i2
 
        return fe;
 }
-EXPORT_SYMBOL(tda826x_attach);
+EXPORT_SYMBOL_GPL(tda826x_attach);
 
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
index f5b60f8..a5ebce5 100644 (file)
@@ -525,7 +525,7 @@ struct dvb_frontend *ts2020_attach(struct dvb_frontend *fe,
 
        return fe;
 }
-EXPORT_SYMBOL(ts2020_attach);
+EXPORT_SYMBOL_GPL(ts2020_attach);
 
 /*
  * We implement own regmap locking due to legacy DVB attach which uses frontend
index 2483f61..41dd9b6 100644 (file)
@@ -186,7 +186,7 @@ struct dvb_frontend *tua6100_attach(struct dvb_frontend *fe, int addr, struct i2
        fe->tuner_priv = priv;
        return fe;
 }
-EXPORT_SYMBOL(tua6100_attach);
+EXPORT_SYMBOL_GPL(tua6100_attach);
 
 MODULE_DESCRIPTION("DVB tua6100 driver");
 MODULE_AUTHOR("Andrew de Quincey");
index 9df14d0..ee5620e 100644 (file)
@@ -434,4 +434,4 @@ MODULE_DESCRIPTION("VLSI VES1820 DVB-C Demodulator driver");
 MODULE_AUTHOR("Ralph Metzler, Holger Waechtler");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(ves1820_attach);
+EXPORT_SYMBOL_GPL(ves1820_attach);
index b747272..c60e21d 100644 (file)
@@ -540,4 +540,4 @@ MODULE_DESCRIPTION("VLSI VES1x93 DVB-S Demodulator driver");
 MODULE_AUTHOR("Ralph Metzler");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(ves1x93_attach);
+EXPORT_SYMBOL_GPL(ves1x93_attach);
index d392c7c..7ba575e 100644 (file)
@@ -496,7 +496,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(zl10036_attach);
+EXPORT_SYMBOL_GPL(zl10036_attach);
 
 module_param_named(debug, zl10036_debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
index 1335bf7..a3e4d21 100644 (file)
@@ -295,7 +295,7 @@ error:
        kfree(state);
        return NULL;
 }
-EXPORT_SYMBOL(zl10039_attach);
+EXPORT_SYMBOL_GPL(zl10039_attach);
 
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off frontend debugging (default:off).");
index 2a2cf20..8849d05 100644 (file)
@@ -665,4 +665,4 @@ MODULE_DESCRIPTION("Zarlink ZL10353 DVB-T demodulator driver");
 MODULE_AUTHOR("Chris Pascoe");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(zl10353_attach);
+EXPORT_SYMBOL_GPL(zl10353_attach);
index 3e52a51..110651e 100644 (file)
@@ -1722,7 +1722,7 @@ struct dst_state *dst_attach(struct dst_state *state, struct dvb_adapter *dvb_ad
        return state;                           /*      Manu (DST is a card not a frontend)     */
 }
 
-EXPORT_SYMBOL(dst_attach);
+EXPORT_SYMBOL_GPL(dst_attach);
 
 static const struct dvb_frontend_ops dst_dvbt_ops = {
        .delsys = { SYS_DVBT },
index d234a0f..a9cc6e7 100644 (file)
@@ -668,7 +668,7 @@ struct dvb_device *dst_ca_attach(struct dst_state *dst, struct dvb_adapter *dvb_
        return NULL;
 }
 
-EXPORT_SYMBOL(dst_ca_attach);
+EXPORT_SYMBOL_GPL(dst_ca_attach);
 
 MODULE_DESCRIPTION("DST DVB-S/T/C Combo CA driver");
 MODULE_AUTHOR("Manu Abraham");
index 6868a0c..520ebd1 100644 (file)
@@ -112,7 +112,7 @@ struct dvb_frontend *ddbridge_dummy_fe_qam_attach(void)
        state->frontend.demodulator_priv = state;
        return &state->frontend;
 }
-EXPORT_SYMBOL(ddbridge_dummy_fe_qam_attach);
+EXPORT_SYMBOL_GPL(ddbridge_dummy_fe_qam_attach);
 
 static const struct dvb_frontend_ops ddbridge_dummy_fe_qam_ops = {
        .delsys = { SYS_DVBC_ANNEX_A },
index eaa3bbc..3d3b54b 100644 (file)
@@ -499,7 +499,7 @@ struct dvb_frontend *fc0011_attach(struct dvb_frontend *fe,
 
        return fe;
 }
-EXPORT_SYMBOL(fc0011_attach);
+EXPORT_SYMBOL_GPL(fc0011_attach);
 
 MODULE_DESCRIPTION("Fitipower FC0011 silicon tuner driver");
 MODULE_AUTHOR("Michael Buesch <m@bues.ch>");
index 4429d5e..81e65ac 100644 (file)
@@ -495,7 +495,7 @@ err:
 
        return fe;
 }
-EXPORT_SYMBOL(fc0012_attach);
+EXPORT_SYMBOL_GPL(fc0012_attach);
 
 MODULE_DESCRIPTION("Fitipower FC0012 silicon tuner driver");
 MODULE_AUTHOR("Hans-Frieder Vogt <hfvogt@gmx.net>");
index 29dd9b5..1006a27 100644 (file)
@@ -608,7 +608,7 @@ struct dvb_frontend *fc0013_attach(struct dvb_frontend *fe,
 
        return fe;
 }
-EXPORT_SYMBOL(fc0013_attach);
+EXPORT_SYMBOL_GPL(fc0013_attach);
 
 MODULE_DESCRIPTION("Fitipower FC0013 silicon tuner driver");
 MODULE_AUTHOR("Hans-Frieder Vogt <hfvogt@gmx.net>");
index 1c746be..1575ab9 100644 (file)
@@ -410,7 +410,7 @@ struct dvb_frontend *max2165_attach(struct dvb_frontend *fe,
 
        return fe;
 }
-EXPORT_SYMBOL(max2165_attach);
+EXPORT_SYMBOL_GPL(max2165_attach);
 
 MODULE_AUTHOR("David T. L. Wong <davidtlwong@gmail.com>");
 MODULE_DESCRIPTION("Maxim MAX2165 silicon tuner driver");
index 0c91615..ed8bdf7 100644 (file)
@@ -356,7 +356,7 @@ error:
        kfree(priv);
        return NULL;
 }
-EXPORT_SYMBOL(mc44s803_attach);
+EXPORT_SYMBOL_GPL(mc44s803_attach);
 
 MODULE_AUTHOR("Jochen Friedrich");
 MODULE_DESCRIPTION("Freescale MC44S803 silicon tuner driver");
index 0278a9f..4205ed4 100644 (file)
@@ -440,7 +440,7 @@ struct dvb_frontend * mt2060_attach(struct dvb_frontend *fe, struct i2c_adapter
 
        return fe;
 }
-EXPORT_SYMBOL(mt2060_attach);
+EXPORT_SYMBOL_GPL(mt2060_attach);
 
 static int mt2060_probe(struct i2c_client *client)
 {
index 37f50ff..eebc060 100644 (file)
@@ -274,7 +274,7 @@ struct dvb_frontend * mt2131_attach(struct dvb_frontend *fe,
        fe->tuner_priv = priv;
        return fe;
 }
-EXPORT_SYMBOL(mt2131_attach);
+EXPORT_SYMBOL_GPL(mt2131_attach);
 
 MODULE_AUTHOR("Steven Toth");
 MODULE_DESCRIPTION("Microtune MT2131 silicon tuner driver");
index 6136f20..2e92885 100644 (file)
@@ -336,7 +336,7 @@ struct dvb_frontend * mt2266_attach(struct dvb_frontend *fe, struct i2c_adapter
        mt2266_calibrate(priv);
        return fe;
 }
-EXPORT_SYMBOL(mt2266_attach);
+EXPORT_SYMBOL_GPL(mt2266_attach);
 
 MODULE_AUTHOR("Olivier DANET");
 MODULE_DESCRIPTION("Microtune MT2266 silicon tuner driver");
index 06dfab9..d9bfa25 100644 (file)
@@ -4120,7 +4120,7 @@ struct dvb_frontend *mxl5005s_attach(struct dvb_frontend *fe,
        fe->tuner_priv = state;
        return fe;
 }
-EXPORT_SYMBOL(mxl5005s_attach);
+EXPORT_SYMBOL_GPL(mxl5005s_attach);
 
 MODULE_DESCRIPTION("MaxLinear MXL5005S silicon tuner driver");
 MODULE_AUTHOR("Steven Toth");
index a7b1986..48fc79c 100644 (file)
@@ -441,7 +441,7 @@ struct dvb_frontend * qt1010_attach(struct dvb_frontend *fe,
        fe->tuner_priv = priv;
        return fe;
 }
-EXPORT_SYMBOL(qt1010_attach);
+EXPORT_SYMBOL_GPL(qt1010_attach);
 
 MODULE_DESCRIPTION("Quantek QT1010 silicon tuner driver");
 MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
index 4ed9464..7d8d84d 100644 (file)
@@ -336,7 +336,7 @@ struct dvb_frontend *tda18218_attach(struct dvb_frontend *fe,
 
        return fe;
 }
-EXPORT_SYMBOL(tda18218_attach);
+EXPORT_SYMBOL_GPL(tda18218_attach);
 
 MODULE_DESCRIPTION("NXP TDA18218HN silicon tuner driver");
 MODULE_AUTHOR("Antti Palosaari <crope@iki.fi>");
index 69c2e1b..5a967ed 100644 (file)
@@ -1512,7 +1512,7 @@ fail:
        return NULL;
 }
 
-EXPORT_SYMBOL(xc2028_attach);
+EXPORT_SYMBOL_GPL(xc2028_attach);
 
 MODULE_DESCRIPTION("Xceive xc2028/xc3028 tuner driver");
 MODULE_AUTHOR("Michel Ludwig <michel.ludwig@gmail.com>");
index d59b4ab..57ded9f 100644 (file)
@@ -1742,7 +1742,7 @@ fail2:
        xc4000_release(fe);
        return NULL;
 }
-EXPORT_SYMBOL(xc4000_attach);
+EXPORT_SYMBOL_GPL(xc4000_attach);
 
 MODULE_AUTHOR("Steven Toth, Davide Ferri");
 MODULE_DESCRIPTION("Xceive xc4000 silicon tuner driver");
index 7b7d9fe..2182e5b 100644 (file)
@@ -1460,7 +1460,7 @@ fail:
        xc5000_release(fe);
        return NULL;
 }
-EXPORT_SYMBOL(xc5000_attach);
+EXPORT_SYMBOL_GPL(xc5000_attach);
 
 MODULE_AUTHOR("Steven Toth");
 MODULE_DESCRIPTION("Xceive xc5000 silicon tuner driver");
index 85be645..aea9574 100644 (file)
@@ -1220,7 +1220,7 @@ config MFD_RC5T583
          different functionality of the device.
 
 config MFD_RK8XX
-       bool
+       tristate
        select MFD_CORE
 
 config MFD_RK8XX_I2C
@@ -1371,8 +1371,9 @@ config MFD_SC27XX_PMIC
          and it also adds the irq_chip parts for handling the PMIC chip events.
 
 config RZ_MTU3
-       bool "Renesas RZ/G2L MTU3a core driver"
+       tristate "Renesas RZ/G2L MTU3a core driver"
        depends on (ARCH_RZG2L && OF) || COMPILE_TEST
+       select MFD_CORE
        help
          Select this option to enable Renesas RZ/G2L MTU3a core driver for
          the Multi-Function Timer Pulse Unit 3 (MTU3a) hardware available
index 9d9e978..15c9582 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/mfd/abx500/ab8500.h>
 #include <linux/mfd/dbx500-prcmu.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 /*
  * Interrupt register offsets
index feb757e..7940583 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/i2c.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/reboot.h>
 #include <linux/regmap.h>
 
index 2406fcd..4e32ac3 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/i2c.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 
 static const struct mfd_cell act8945a_devs[] = {
@@ -68,7 +68,7 @@ MODULE_DEVICE_TABLE(of, act8945a_of_match);
 static struct i2c_driver act8945a_i2c_driver = {
        .driver = {
                   .name = "act8945a",
-                  .of_match_table = of_match_ptr(act8945a_of_match),
+                  .of_match_table = act8945a_of_match,
        },
        .probe = act8945a_i2c_probe,
        .id_table = act8945a_i2c_id,
index 34ef526..d53e433 100644 (file)
@@ -163,7 +163,7 @@ static struct spi_driver altr_a10sr_spi_driver = {
        .probe = altr_a10sr_spi_probe,
        .driver = {
                .name = "altr_a10sr",
-               .of_match_table = of_match_ptr(altr_a10sr_spi_of_match),
+               .of_match_table = altr_a10sr_spi_of_match,
        },
        .id_table = altr_a10sr_spi_ids,
 };
index af20581..0e52bd2 100644 (file)
@@ -14,8 +14,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
index c166fcd..19a0adf 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
index 7c5de3a..67473b5 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 
 #define ATC260X_CHIP_REV_MAX   31
index 3c2414b..20de7f4 100644 (file)
@@ -83,7 +83,6 @@ static int atmel_hlcdc_probe(struct platform_device *pdev)
        struct atmel_hlcdc_regmap *hregmap;
        struct device *dev = &pdev->dev;
        struct atmel_hlcdc *hlcdc;
-       struct resource *res;
 
        hregmap = devm_kzalloc(dev, sizeof(*hregmap), GFP_KERNEL);
        if (!hregmap)
@@ -93,8 +92,7 @@ static int atmel_hlcdc_probe(struct platform_device *pdev)
        if (!hlcdc)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       hregmap->regs = devm_ioremap_resource(dev, res);
+       hregmap->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(hregmap->regs))
                return PTR_ERR(hregmap->regs);
 
index c03bc5c..87603ee 100644 (file)
@@ -342,7 +342,7 @@ static const struct regmap_config axp152_regmap_config = {
        .wr_table       = &axp152_writeable_table,
        .volatile_table = &axp152_volatile_table,
        .max_register   = AXP152_PWM1_DUTY_CYCLE,
-       .cache_type     = REGCACHE_RBTREE,
+       .cache_type     = REGCACHE_MAPLE,
 };
 
 static const struct regmap_config axp192_regmap_config = {
@@ -360,7 +360,7 @@ static const struct regmap_config axp20x_regmap_config = {
        .wr_table       = &axp20x_writeable_table,
        .volatile_table = &axp20x_volatile_table,
        .max_register   = AXP20X_OCV(AXP20X_OCV_MAX),
-       .cache_type     = REGCACHE_RBTREE,
+       .cache_type     = REGCACHE_MAPLE,
 };
 
 static const struct regmap_config axp22x_regmap_config = {
@@ -369,7 +369,7 @@ static const struct regmap_config axp22x_regmap_config = {
        .wr_table       = &axp22x_writeable_table,
        .volatile_table = &axp22x_volatile_table,
        .max_register   = AXP22X_BATLOW_THRES1,
-       .cache_type     = REGCACHE_RBTREE,
+       .cache_type     = REGCACHE_MAPLE,
 };
 
 static const struct regmap_config axp288_regmap_config = {
@@ -378,7 +378,7 @@ static const struct regmap_config axp288_regmap_config = {
        .wr_table       = &axp288_writeable_table,
        .volatile_table = &axp288_volatile_table,
        .max_register   = AXP288_FG_TUNE5,
-       .cache_type     = REGCACHE_RBTREE,
+       .cache_type     = REGCACHE_MAPLE,
 };
 
 static const struct regmap_config axp313a_regmap_config = {
@@ -396,7 +396,7 @@ static const struct regmap_config axp806_regmap_config = {
        .wr_table       = &axp806_writeable_table,
        .volatile_table = &axp806_volatile_table,
        .max_register   = AXP806_REG_ADDR_EXT,
-       .cache_type     = REGCACHE_RBTREE,
+       .cache_type     = REGCACHE_MAPLE,
 };
 
 static const struct regmap_config axp15060_regmap_config = {
@@ -405,7 +405,7 @@ static const struct regmap_config axp15060_regmap_config = {
        .wr_table       = &axp15060_writeable_table,
        .volatile_table = &axp15060_volatile_table,
        .max_register   = AXP15060_IRQ2_STATE,
-       .cache_type     = REGCACHE_RBTREE,
+       .cache_type     = REGCACHE_MAPLE,
 };
 
 #define INIT_REGMAP_IRQ(_variant, _irq, _off, _mask)                   \
index 9f39b46..92eede9 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
index 92f4dfc..79d393b 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/cros_ec_chardev.h>
 #include <linux/platform_data/cros_ec_commands.h>
index 3c77f0a..59b005c 100644 (file)
@@ -1249,7 +1249,7 @@ const struct regmap_config cs47l15_16bit_spi_regmap = {
        .readable_reg = &cs47l15_16bit_readable_register,
        .volatile_reg = &cs47l15_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l15_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l15_reg_default),
 };
@@ -1264,7 +1264,7 @@ const struct regmap_config cs47l15_16bit_i2c_regmap = {
        .readable_reg = &cs47l15_16bit_readable_register,
        .volatile_reg = &cs47l15_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l15_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l15_reg_default),
 };
@@ -1281,7 +1281,7 @@ const struct regmap_config cs47l15_32bit_spi_regmap = {
        .readable_reg = &cs47l15_32bit_readable_register,
        .volatile_reg = &cs47l15_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l15_32bit_spi_regmap);
 
@@ -1295,6 +1295,6 @@ const struct regmap_config cs47l15_32bit_i2c_regmap = {
        .readable_reg = &cs47l15_32bit_readable_register,
        .volatile_reg = &cs47l15_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l15_32bit_i2c_regmap);
index c289d92..878dfd2 100644 (file)
@@ -1616,7 +1616,7 @@ const struct regmap_config cs47l24_spi_regmap = {
        .readable_reg = cs47l24_readable_register,
        .volatile_reg = cs47l24_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l24_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l24_reg_default),
 };
index a0bc6c5..274f4b0 100644 (file)
@@ -1498,7 +1498,7 @@ const struct regmap_config cs47l35_16bit_spi_regmap = {
        .readable_reg = cs47l35_16bit_readable_register,
        .volatile_reg = cs47l35_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l35_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l35_reg_default),
 };
@@ -1515,7 +1515,7 @@ const struct regmap_config cs47l35_16bit_i2c_regmap = {
        .readable_reg = cs47l35_16bit_readable_register,
        .volatile_reg = cs47l35_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l35_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l35_reg_default),
 };
@@ -1534,7 +1534,7 @@ const struct regmap_config cs47l35_32bit_spi_regmap = {
        .readable_reg = cs47l35_32bit_readable_register,
        .volatile_reg = cs47l35_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l35_32bit_spi_regmap);
 
@@ -1550,6 +1550,6 @@ const struct regmap_config cs47l35_32bit_i2c_regmap = {
        .readable_reg = cs47l35_32bit_readable_register,
        .volatile_reg = cs47l35_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l35_32bit_i2c_regmap);
index 270d8ed..f397894 100644 (file)
@@ -2836,7 +2836,7 @@ const struct regmap_config cs47l85_16bit_spi_regmap = {
        .readable_reg = cs47l85_16bit_readable_register,
        .volatile_reg = cs47l85_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l85_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l85_reg_default),
 };
@@ -2853,7 +2853,7 @@ const struct regmap_config cs47l85_16bit_i2c_regmap = {
        .readable_reg = cs47l85_16bit_readable_register,
        .volatile_reg = cs47l85_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l85_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l85_reg_default),
 };
@@ -2872,7 +2872,7 @@ const struct regmap_config cs47l85_32bit_spi_regmap = {
        .readable_reg = cs47l85_32bit_readable_register,
        .volatile_reg = cs47l85_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l85_32bit_spi_regmap);
 
@@ -2888,6 +2888,6 @@ const struct regmap_config cs47l85_32bit_i2c_regmap = {
        .readable_reg = cs47l85_32bit_readable_register,
        .volatile_reg = cs47l85_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l85_32bit_i2c_regmap);
index 7345fc0..6f9ceb3 100644 (file)
@@ -2539,7 +2539,7 @@ const struct regmap_config cs47l90_16bit_spi_regmap = {
        .readable_reg = cs47l90_16bit_readable_register,
        .volatile_reg = cs47l90_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l90_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l90_reg_default),
 };
@@ -2556,7 +2556,7 @@ const struct regmap_config cs47l90_16bit_i2c_regmap = {
        .readable_reg = cs47l90_16bit_readable_register,
        .volatile_reg = cs47l90_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l90_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l90_reg_default),
 };
@@ -2575,7 +2575,7 @@ const struct regmap_config cs47l90_32bit_spi_regmap = {
        .readable_reg = cs47l90_32bit_readable_register,
        .volatile_reg = cs47l90_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l90_32bit_spi_regmap);
 
@@ -2591,6 +2591,6 @@ const struct regmap_config cs47l90_32bit_i2c_regmap = {
        .readable_reg = cs47l90_32bit_readable_register,
        .volatile_reg = cs47l90_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l90_32bit_i2c_regmap);
index f296e35..4d9ba86 100644 (file)
@@ -1890,7 +1890,7 @@ const struct regmap_config cs47l92_16bit_spi_regmap = {
        .readable_reg = &cs47l92_16bit_readable_register,
        .volatile_reg = &cs47l92_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l92_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l92_reg_default),
 };
@@ -1907,7 +1907,7 @@ const struct regmap_config cs47l92_16bit_i2c_regmap = {
        .readable_reg = &cs47l92_16bit_readable_register,
        .volatile_reg = &cs47l92_16bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = cs47l92_reg_default,
        .num_reg_defaults = ARRAY_SIZE(cs47l92_reg_default),
 };
@@ -1926,7 +1926,7 @@ const struct regmap_config cs47l92_32bit_spi_regmap = {
        .readable_reg = &cs47l92_32bit_readable_register,
        .volatile_reg = &cs47l92_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l92_32bit_spi_regmap);
 
@@ -1942,6 +1942,6 @@ const struct regmap_config cs47l92_32bit_i2c_regmap = {
        .readable_reg = &cs47l92_32bit_readable_register,
        .volatile_reg = &cs47l92_32bit_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 EXPORT_SYMBOL_GPL(cs47l92_32bit_i2c_regmap);
index 541e2d4..fd000a2 100644 (file)
 #include <linux/mfd/core.h>
 #include <linux/i2c.h>
 #include <linux/err.h>
+#include <linux/of.h>
 
 #include <linux/mfd/da9052/da9052.h>
 #include <linux/mfd/da9052/reg.h>
 
-#ifdef CONFIG_OF
-#include <linux/of.h>
-#include <linux/of_device.h>
-#endif
 
 /* I2C safe register check */
 static inline bool i2c_safe_reg(unsigned char reg)
index bbaf4f0..9a5f51b 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/i2c.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #include <linux/mfd/da9055/core.h>
 
index 48f58b6..45da007 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/irq.h>
 #include <linux/mfd/core.h>
index 166cd21..1506d8d 100644 (file)
@@ -109,14 +109,12 @@ static int exynos_lpass_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct exynos_lpass *lpass;
        void __iomem *base_top;
-       struct resource *res;
 
        lpass = devm_kzalloc(dev, sizeof(*lpass), GFP_KERNEL);
        if (!lpass)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       base_top = devm_ioremap_resource(dev, res);
+       base_top = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base_top))
                return PTR_ERR(base_top);
 
index cb5cf4a..a6a8905 100644 (file)
@@ -59,7 +59,7 @@ static int hi6421_pmic_probe(struct platform_device *pdev)
        id = of_match_device(of_hi6421_pmic_match, &pdev->dev);
        if (!id)
                return -EINVAL;
-       type = (enum hi6421_type)id->data;
+       type = (uintptr_t)id->data;
 
        pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
        if (!pmic)
index a58e42d..8feae8d 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/mfd/hi655x-pmic.h>
 #include <linux/module.h>
 #include <linux/gpio/consumer.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
@@ -100,8 +100,7 @@ static int hi655x_pmic_probe(struct platform_device *pdev)
                return -ENOMEM;
        pmic->dev = dev;
 
-       pmic->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       base = devm_ioremap_resource(dev, pmic->res);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base))
                return PTR_ERR(base);
 
@@ -163,7 +162,7 @@ MODULE_DEVICE_TABLE(of, hi655x_pmic_match);
 static struct platform_driver hi655x_pmic_driver = {
        .driver = {
                .name = "hi655x-pmic",
-               .of_match_table = of_match_ptr(hi655x_pmic_match),
+               .of_match_table = hi655x_pmic_match,
        },
        .probe  = hi655x_pmic_probe,
        .remove = hi655x_pmic_remove,
index 6d39684..c964ea6 100644 (file)
@@ -78,8 +78,6 @@ EXPORT_SYMBOL(ipaq_micro_tx_msg);
 
 static void micro_rx_msg(struct ipaq_micro *micro, u8 id, int len, u8 *data)
 {
-       int i;
-
        dev_dbg(micro->dev, "RX msg: %02x, %d bytes\n", id, len);
 
        spin_lock(&micro->lock);
@@ -131,10 +129,7 @@ static void micro_rx_msg(struct ipaq_micro *micro, u8 id, int len, u8 *data)
                break;
        default:
                dev_err(micro->dev,
-                       "unknown msg %d [%d] ", id, len);
-               for (i = 0; i < len; ++i)
-                       pr_cont("0x%02x ", data[i]);
-               pr_cont("\n");
+                       "unknown msg %d [%d] %*ph\n", id, len, len, data);
        }
        spin_unlock(&micro->lock);
 }
index dfe9cb7..e03b4d3 100644 (file)
@@ -27,7 +27,7 @@
 #include <linux/mfd/iqs62x.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index 3c88431..59092f8 100644 (file)
@@ -379,7 +379,7 @@ static int lochnagar_i2c_probe(struct i2c_client *i2c)
 static struct i2c_driver lochnagar_i2c_driver = {
        .driver = {
                .name = "lochnagar",
-               .of_match_table = of_match_ptr(lochnagar_of_match),
+               .of_match_table = lochnagar_of_match,
                .suppress_bind_attrs = true,
        },
        .probe = lochnagar_i2c_probe,
index 6639f0f..de7ab7a 100644 (file)
@@ -7,8 +7,8 @@
 
 #include <linux/interrupt.h>
 #include <linux/mfd/core.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 
 #include <linux/mfd/lp873x.h>
index 88ce4d7..1b7f834 100644 (file)
@@ -92,7 +92,7 @@ static int lp87565_probe(struct i2c_client *client)
 
        of_id = of_match_device(of_lp87565_match_table, &client->dev);
        if (of_id)
-               lp87565->dev_type = (enum lp87565_device_type)of_id->data;
+               lp87565->dev_type = (uintptr_t)of_id->data;
 
        i2c_set_clientdata(client, lp87565);
 
index 0968aa9..a404ea2 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 
 #include <linux/mfd/madera/core.h>
index da84eb5..ad07ebe 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/spi/spi.h>
 
index 25ed884..1f4f500 100644 (file)
@@ -402,8 +402,7 @@ static int max14577_i2c_probe(struct i2c_client *i2c)
 
                of_id = of_match_device(max14577_dt_match, &i2c->dev);
                if (of_id)
-                       max14577->dev_type =
-                               (enum maxim_device_type)of_id->data;
+                       max14577->dev_type = (uintptr_t)of_id->data;
        } else {
                max14577->dev_type = id->driver_data;
        }
index e147e94..10c2e27 100644 (file)
@@ -173,7 +173,7 @@ static int max77541_probe(struct i2c_client *client)
        i2c_set_clientdata(client, max77541);
        max77541->i2c = client;
 
-       max77541->id  = (enum max7754x_ids)device_get_match_data(dev);
+       max77541->id = (uintptr_t)device_get_match_data(dev);
        if (!max77541->id)
                max77541->id  = (enum max7754x_ids)id->driver_data;
 
index 5811ed8..e63e8e4 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/mfd/max77620.h>
 #include <linux/init.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
index 0183308..91c286c 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/mfd/max77686-private.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 static const struct mfd_cell max77686_devs[] = {
        { .name = "max77686-pmic", },
index b3689c1..fcff0c4 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/max77693-common.h>
 #include <linux/mfd/max77843-private.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 static const struct mfd_cell max77843_devs[] = {
index 78b5ee6..8bbe797 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/mfd/max8907.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
index 0246bbe..105d79b 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/max8925.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 
 static const struct resource bk_resources[] = {
        { 0x84, 0x84, "mode control", IORESOURCE_REG, },
index 94c09a5..110bef7 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
index 33a3ec5..4cc426a 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/pm_runtime.h>
 #include <linux/mutex.h>
index de59b49..6bc0e75 100644 (file)
@@ -53,7 +53,6 @@ static const struct regmap_config mc13xxx_regmap_i2c_config = {
 
 static int mc13xxx_i2c_probe(struct i2c_client *client)
 {
-       const struct i2c_device_id *id = i2c_client_get_device_id(client);
        struct mc13xxx *mc13xxx;
        int ret;
 
@@ -73,13 +72,7 @@ static int mc13xxx_i2c_probe(struct i2c_client *client)
                return ret;
        }
 
-       if (client->dev.of_node) {
-               const struct of_device_id *of_id =
-                       of_match_device(mc13xxx_dt_ids, &client->dev);
-               mc13xxx->variant = of_id->data;
-       } else {
-               mc13xxx->variant = (void *)id->driver_data;
-       }
+       mc13xxx->variant = i2c_get_match_data(client);
 
        return mc13xxx_common_init(&client->dev);
 }
index 3897564..49830b5 100644 (file)
@@ -3,6 +3,8 @@
 // Copyright (c) 2020 MediaTek Inc.
 
 #include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/mfd/mt6357/core.h>
 #include <linux/mfd/mt6357/registers.h>
 #include <linux/mfd/mt6358/core.h>
@@ -11,9 +13,6 @@
 #include <linux/mfd/mt6359/registers.h>
 #include <linux/mfd/mt6397/core.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
index f6c1f80..4449dde 100644 (file)
@@ -6,9 +6,10 @@
 
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
+#include <linux/irqdomain.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/mt6323/core.h>
index 72f923e..886745b 100644 (file)
@@ -3,10 +3,9 @@
 // Copyright (c) 2019 MediaTek Inc.
 
 #include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/suspend.h>
index 111d11f..21f3033 100644 (file)
@@ -142,7 +142,7 @@ static int mxs_lradc_probe(struct platform_device *pdev)
        if (!of_id)
                return -EINVAL;
 
-       lradc->soc = (enum mxs_lradc_id)of_id->data;
+       lradc->soc = (uintptr_t)of_id->data;
 
        lradc->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(lradc->clk)) {
index 7f57751..78f1bb5 100644 (file)
@@ -534,7 +534,6 @@ static int usbhs_omap_probe(struct platform_device *pdev)
        struct device                   *dev =  &pdev->dev;
        struct usbhs_omap_platform_data *pdata = dev_get_platdata(dev);
        struct usbhs_hcd_omap           *omap;
-       struct resource                 *res;
        int                             ret = 0;
        int                             i;
        bool                            need_logic_fck;
@@ -569,8 +568,7 @@ static int usbhs_omap_probe(struct platform_device *pdev)
                return -ENOMEM;
        }
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       omap->uhh_base = devm_ioremap_resource(dev, res);
+       omap->uhh_base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(omap->uhh_base))
                return PTR_ERR(omap->uhh_base);
 
index 69cbc20..9063537 100644 (file)
@@ -200,15 +200,13 @@ static unsigned ohci_omap3_fslsmode(enum usbhs_omap_port_mode mode)
 static int usbtll_omap_probe(struct platform_device *pdev)
 {
        struct device                           *dev =  &pdev->dev;
-       struct resource                         *res;
        struct usbtll_omap                      *tll;
        void __iomem                            *base;
        int                                     i, nch, ver;
 
        dev_dbg(dev, "starting TI HSUSB TLL Controller\n");
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       base = devm_ioremap_resource(dev, res);
+       base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(base))
                return PTR_ERR(base);
 
index a36f124..6e562ba 100644 (file)
@@ -18,7 +18,8 @@
 #include <linux/err.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/palmas.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
 
 static const struct regmap_config palmas_regmap_config[PALMAS_NUM_CLIENTS] = {
        {
index 94a8cca..3ac3742 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/regmap.h>
index 9a948df..07c531b 100644 (file)
@@ -103,8 +103,9 @@ static int
 pm8xxx_config_irq(struct pm_irq_chip *chip, unsigned int bp, unsigned int cp)
 {
        int     rc;
+       unsigned long flags;
 
-       spin_lock(&chip->pm_irq_lock);
+       spin_lock_irqsave(&chip->pm_irq_lock, flags);
        rc = regmap_write(chip->regmap, SSBI_REG_ADDR_IRQ_BLK_SEL, bp);
        if (rc) {
                pr_err("Failed Selecting Block %d rc=%d\n", bp, rc);
@@ -116,7 +117,7 @@ pm8xxx_config_irq(struct pm_irq_chip *chip, unsigned int bp, unsigned int cp)
        if (rc)
                pr_err("Failed Configuring IRQ rc=%d\n", rc);
 bail:
-       spin_unlock(&chip->pm_irq_lock);
+       spin_unlock_irqrestore(&chip->pm_irq_lock, flags);
        return rc;
 }
 
@@ -321,6 +322,7 @@ static int pm8xxx_irq_get_irqchip_state(struct irq_data *d,
        struct pm_irq_chip *chip = irq_data_get_irq_chip_data(d);
        unsigned int pmirq = irqd_to_hwirq(d);
        unsigned int bits;
+       unsigned long flags;
        int irq_bit;
        u8 block;
        int rc;
@@ -331,7 +333,7 @@ static int pm8xxx_irq_get_irqchip_state(struct irq_data *d,
        block = pmirq / 8;
        irq_bit = pmirq % 8;
 
-       spin_lock(&chip->pm_irq_lock);
+       spin_lock_irqsave(&chip->pm_irq_lock, flags);
        rc = regmap_write(chip->regmap, SSBI_REG_ADDR_IRQ_BLK_SEL, block);
        if (rc) {
                pr_err("Failed Selecting Block %d rc=%d\n", block, rc);
@@ -346,7 +348,7 @@ static int pm8xxx_irq_get_irqchip_state(struct irq_data *d,
 
        *state = !!(bits & BIT(irq_bit));
 bail:
-       spin_unlock(&chip->pm_irq_lock);
+       spin_unlock_irqrestore(&chip->pm_irq_lock, flags);
 
        return rc;
 }
index 545196c..da50eba 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/mfd/rave-sp.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/of_platform.h>
 #include <linux/sched.h>
 #include <linux/serdev.h>
 #include <asm/unaligned.h>
index e8fc9e2..11a831e 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/mfd/rk808.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/reboot.h>
 
index 333fef8..0fe616b 100644 (file)
@@ -277,7 +277,7 @@ static SIMPLE_DEV_PM_OPS(rn5t618_i2c_dev_pm_ops,
 static struct i2c_driver rn5t618_i2c_driver = {
        .driver = {
                .name = "rn5t618",
-               .of_match_table = of_match_ptr(rn5t618_of_match),
+               .of_match_table = rn5t618_of_match,
                .pm = &rn5t618_i2c_dev_pm_ops,
        },
        .probe = rn5t618_i2c_probe,
index 93d80a7..594718f 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/mfd/rohm-bd71828.h>
 #include <linux/mfd/rohm-generic.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/types.h>
 
index 0b58ecc..4798bdf 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/mfd/rohm-bd718x7.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/types.h>
 
index 6456733..bceac70 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/mfd/rohm-bd957x.h>
 #include <linux/mfd/rohm-generic.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/types.h>
 
index 26972a5..06d78a1 100644 (file)
@@ -277,7 +277,7 @@ MODULE_DEVICE_TABLE(of, rsmu_i2c_of_match);
 static struct i2c_driver rsmu_i2c_driver = {
        .driver = {
                .name = "rsmu-i2c",
-               .of_match_table = of_match_ptr(rsmu_i2c_of_match),
+               .of_match_table = rsmu_i2c_of_match,
        },
        .probe = rsmu_i2c_probe,
        .remove = rsmu_i2c_remove,
index a4a595b..ca0a120 100644 (file)
@@ -262,7 +262,7 @@ MODULE_DEVICE_TABLE(of, rsmu_spi_of_match);
 static struct spi_driver rsmu_spi_driver = {
        .driver = {
                .name = "rsmu-spi",
-               .of_match_table = of_match_ptr(rsmu_spi_of_match),
+               .of_match_table = rsmu_spi_of_match,
        },
        .probe = rsmu_spi_probe,
        .remove = rsmu_spi_remove,
index 67b0a22..7e23ab3 100644 (file)
@@ -10,9 +10,9 @@
  */
 
 #include <linux/err.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/of_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/rt5033.h>
 #include <linux/mfd/rt5033-private.h>
index 04006f4..f3dac4a 100644 (file)
@@ -11,7 +11,9 @@
 #include <linux/irq.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/rz-mtu3.h>
-#include <linux/of_platform.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/spinlock.h>
 
@@ -20,7 +22,7 @@
 struct rz_mtu3_priv {
        void __iomem *mmio;
        struct reset_control *rstc;
-       raw_spinlock_t lock;
+       spinlock_t lock;
 };
 
 /******* MTU3 registers (original offset is +0x1200) *******/
@@ -174,11 +176,11 @@ void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, u16 offset,
        struct rz_mtu3_priv *priv = mtu->priv_data;
        unsigned long tmdr, flags;
 
-       raw_spin_lock_irqsave(&priv->lock, flags);
+       spin_lock_irqsave(&priv->lock, flags);
        tmdr = rz_mtu3_shared_reg_read(ch, offset);
        __assign_bit(pos, &tmdr, !!val);
        rz_mtu3_shared_reg_write(ch, offset, tmdr);
-       raw_spin_unlock_irqrestore(&priv->lock, flags);
+       spin_unlock_irqrestore(&priv->lock, flags);
 }
 EXPORT_SYMBOL_GPL(rz_mtu3_shared_reg_update_bit);
 
@@ -250,16 +252,17 @@ static void rz_mtu3_start_stop_ch(struct rz_mtu3_channel *ch, bool start)
        u16 offset;
        u8 bitpos;
 
-       /* start stop register shared by multiple timer channels */
-       raw_spin_lock_irqsave(&priv->lock, flags);
-
        offset = rz_mtu3_get_tstr_offset(ch);
        bitpos = rz_mtu3_get_tstr_bit_pos(ch);
+
+       /* start stop register shared by multiple timer channels */
+       spin_lock_irqsave(&priv->lock, flags);
+
        tstr = rz_mtu3_shared_reg_read(ch, offset);
        __assign_bit(bitpos, &tstr, start);
        rz_mtu3_shared_reg_write(ch, offset, tstr);
 
-       raw_spin_unlock_irqrestore(&priv->lock, flags);
+       spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 bool rz_mtu3_is_enabled(struct rz_mtu3_channel *ch)
@@ -267,21 +270,18 @@ bool rz_mtu3_is_enabled(struct rz_mtu3_channel *ch)
        struct rz_mtu3 *mtu = dev_get_drvdata(ch->dev->parent);
        struct rz_mtu3_priv *priv = mtu->priv_data;
        unsigned long flags, tstr;
-       bool ret = false;
        u16 offset;
        u8 bitpos;
 
-       /* start stop register shared by multiple timer channels */
-       raw_spin_lock_irqsave(&priv->lock, flags);
-
        offset = rz_mtu3_get_tstr_offset(ch);
        bitpos = rz_mtu3_get_tstr_bit_pos(ch);
-       tstr = rz_mtu3_shared_reg_read(ch, offset);
-       ret = tstr & BIT(bitpos);
 
-       raw_spin_unlock_irqrestore(&priv->lock, flags);
+       /* start stop register shared by multiple timer channels */
+       spin_lock_irqsave(&priv->lock, flags);
+       tstr = rz_mtu3_shared_reg_read(ch, offset);
+       spin_unlock_irqrestore(&priv->lock, flags);
 
-       return ret;
+       return tstr & BIT(bitpos);
 }
 EXPORT_SYMBOL_GPL(rz_mtu3_is_enabled);
 
@@ -349,7 +349,7 @@ static int rz_mtu3_probe(struct platform_device *pdev)
                return PTR_ERR(ddata->clk);
 
        reset_control_deassert(priv->rstc);
-       raw_spin_lock_init(&priv->lock);
+       spin_lock_init(&priv->lock);
        platform_set_drvdata(pdev, ddata);
 
        for (i = 0; i < RZ_MTU_NUM_CHANNELS; i++) {
index d2f6319..a6b0d73 100644 (file)
@@ -10,8 +10,6 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/mutex.h>
index d21f32c..81e517c 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/module.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/sc27xx-pmic.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/regmap.h>
 #include <linux/spi/spi.h>
index dee89db..b0b0be4 100644 (file)
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/ssbi.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 
 /* SSBI 2.0 controller registers */
 #define SSBI2_CMD                      0x0008
index fa322f4..b2704a9 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/mfd/stm32-lptimer.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #define STM32_LPTIM_MAX_REGISTER       0x3fc
 
index 44ed2fc..732a28d 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/mfd/stm32-timers.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/reset.h>
 
 #define STM32_TIMERS_MAX_REGISTERS     0x3fc
@@ -226,8 +227,7 @@ static int stm32_timers_probe(struct platform_device *pdev)
        if (!ddata)
                return -ENOMEM;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       mmio = devm_ioremap_resource(dev, res);
+       mmio = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(mmio))
                return PTR_ERR(mmio);
 
index 1d7b401..fe018be 100644 (file)
@@ -87,7 +87,7 @@ stmpe_i2c_probe(struct i2c_client *i2c)
                dev_info(&i2c->dev, "matching on node name, compatible is preferred\n");
                partnum = id->driver_data;
        } else
-               partnum = (enum stmpe_partnum)of_id->data;
+               partnum = (uintptr_t)of_id->data;
 
        return stmpe_probe(&i2c_ci, partnum);
 }
index 3cc7492..c5128fe 100644 (file)
@@ -219,7 +219,7 @@ MODULE_DEVICE_TABLE(of, stpmic1_of_match);
 static struct i2c_driver stpmic1_driver = {
        .driver = {
                .name = "stpmic1",
-               .of_match_table = of_match_ptr(stpmic1_of_match),
+               .of_match_table = stpmic1_of_match,
                .pm = pm_sleep_ptr(&stpmic1_pm),
        },
        .probe = stpmic1_probe,
index d1cbea2..3029d48 100644 (file)
@@ -8,8 +8,8 @@
 #include <linux/kernel.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 
 #include <linux/mfd/sun4i-gpadc.h>
index 16df64e..db28eb0 100644 (file)
@@ -340,7 +340,7 @@ tc3589x_of_probe(struct device *dev, enum tc3589x_version *version)
        of_id = of_match_device(tc3589x_match, dev);
        if (!of_id)
                return ERR_PTR(-ENODEV);
-       *version = (enum tc3589x_version) of_id->data;
+       *version = (uintptr_t) of_id->data;
 
        for_each_child_of_node(np, child) {
                if (of_device_is_compatible(child, "toshiba,tc3589x-gpio"))
@@ -483,7 +483,7 @@ static struct i2c_driver tc3589x_driver = {
        .driver = {
                .name   = "tc3589x",
                .pm     = pm_sleep_ptr(&tc3589x_dev_pm_ops),
-               .of_match_table = of_match_ptr(tc3589x_match),
+               .of_match_table = tc3589x_match,
        },
        .probe          = tc3589x_probe,
        .remove         = tc3589x_remove,
index 4f06ada..cfc9f88 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/mfd/ti-lmu-register.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/slab.h>
 
 struct ti_lmu_data {
index 07825cf..b88eb70 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/mfd/core.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/sched.h>
 
 #include <linux/mfd/ti_am335x_tscadc.h>
@@ -201,8 +201,7 @@ static      int ti_tscadc_probe(struct platform_device *pdev)
        else
                tscadc->irq = err;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       tscadc->tscadc_base = devm_ioremap_resource(&pdev->dev, res);
+       tscadc->tscadc_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(tscadc->tscadc_base))
                return PTR_ERR(tscadc->tscadc_base);
 
index 9716bf7..95dafb0 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/tps6507x.h>
 
index a35ad70..9245e11 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/tps65090.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/err.h>
 
 #define NUM_INT_REG 2
index 6059929..029ecc3 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index 619bf7a..11e4e52 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/regmap.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
index 15f3148..0fb9c5c 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #include <linux/mfd/core.h>
 #include <linux/mfd/tps6594.h>
index d85675a..9ce34df 100644 (file)
@@ -16,8 +16,6 @@
 #include <linux/err.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/gpio/consumer.h>
 #include <linux/delay.h>
 #include <linux/i2c.h>
index 6bba396..f77ecc6 100644 (file)
@@ -1938,7 +1938,7 @@ const struct regmap_config wm5102_i2c_regmap = {
        .readable_reg = wm5102_readable_register,
        .volatile_reg = wm5102_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = wm5102_reg_default,
        .num_reg_defaults = ARRAY_SIZE(wm5102_reg_default),
 };
index 65b9b1d..eba3248 100644 (file)
@@ -3218,7 +3218,7 @@ const struct regmap_config wm5110_i2c_regmap = {
        .readable_reg = wm5110_readable_register,
        .volatile_reg = wm5110_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = wm5110_reg_default,
        .num_reg_defaults = ARRAY_SIZE(wm5110_reg_default),
 };
index e86b6a4..e7e6892 100644 (file)
@@ -15,8 +15,7 @@
 #include <linux/mfd/core.h>
 #include <linux/slab.h>
 #include <linux/err.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 
 #include <linux/mfd/wm831x/core.h>
 #include <linux/mfd/wm831x/pdata.h>
index 997837f..694ddbb 100644 (file)
@@ -36,7 +36,7 @@ static int wm831x_i2c_probe(struct i2c_client *i2c)
                        dev_err(&i2c->dev, "Failed to match device\n");
                        return -ENODEV;
                }
-               type = (enum wm831x_parent)of_id->data;
+               type = (uintptr_t)of_id->data;
        } else {
                type = (enum wm831x_parent)id->driver_data;
        }
index 7bcddcc..76be7ef 100644 (file)
@@ -33,7 +33,7 @@ static int wm831x_spi_probe(struct spi_device *spi)
                        dev_err(&spi->dev, "Failed to match device\n");
                        return -ENODEV;
                }
-               type = (enum wm831x_parent)of_id->data;
+               type = (uintptr_t)of_id->data;
        } else {
                type = (enum wm831x_parent)id->driver_data;
        }
index 1e4f169..aba7af6 100644 (file)
@@ -628,7 +628,7 @@ static int wm8994_i2c_probe(struct i2c_client *i2c)
        if (i2c->dev.of_node) {
                of_id = of_match_device(wm8994_of_match, &i2c->dev);
                if (of_id)
-                       wm8994->type = (enum wm8994_type)of_id->data;
+                       wm8994->type = (uintptr_t)of_id->data;
        } else {
                wm8994->type = id->driver_data;
        }
index cd4fef7..ee2ed67 100644 (file)
@@ -1238,7 +1238,7 @@ struct regmap_config wm1811_regmap_config = {
        .reg_bits = 16,
        .val_bits = 16,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 
        .reg_defaults = wm1811_defaults,
        .num_reg_defaults = ARRAY_SIZE(wm1811_defaults),
@@ -1253,7 +1253,7 @@ struct regmap_config wm8994_regmap_config = {
        .reg_bits = 16,
        .val_bits = 16,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 
        .reg_defaults = wm8994_defaults,
        .num_reg_defaults = ARRAY_SIZE(wm8994_defaults),
@@ -1268,7 +1268,7 @@ struct regmap_config wm8958_regmap_config = {
        .reg_bits = 16,
        .val_bits = 16,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 
        .reg_defaults = wm8958_defaults,
        .num_reg_defaults = ARRAY_SIZE(wm8958_defaults),
index 3476787..288c57b 100644 (file)
@@ -1523,7 +1523,7 @@ const struct regmap_config wm8997_i2c_regmap = {
        .readable_reg = wm8997_readable_register,
        .volatile_reg = wm8997_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = wm8997_reg_default,
        .num_reg_defaults = ARRAY_SIZE(wm8997_reg_default),
 };
index 9b34a6d..b3e6e85 100644 (file)
@@ -1556,7 +1556,7 @@ const struct regmap_config wm8998_i2c_regmap = {
        .readable_reg = wm8998_readable_register,
        .volatile_reg = wm8998_volatile_register,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
        .reg_defaults = wm8998_reg_default,
        .num_reg_defaults = ARRAY_SIZE(wm8998_reg_default),
 };
index 153fb8d..df589d9 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/interrupt.h>
 #include <linux/reboot.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 #include <linux/mtd/map.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/cfi.h>
@@ -650,7 +649,7 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary)
 
                        /*
                         * Valid primary extension versions are: 1.0, 1.1, 1.2, 1.3, 1.4, 1.5
-                        * see: http://cs.ozerki.net/zap/pub/axim-x5/docs/cfi_r20.pdf, page 19 
+                        * see: http://cs.ozerki.net/zap/pub/axim-x5/docs/cfi_r20.pdf, page 19
                         *      http://www.spansion.com/Support/AppNotes/cfi_100_20011201.pdf
                         *      http://www.spansion.com/Support/Datasheets/s29ws-p_00_a12_e.pdf
                         *      http://www.spansion.com/Support/Datasheets/S29GL_128S_01GS_00_02_e.pdf
index 6673122..42db767 100644 (file)
@@ -2335,13 +2335,27 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port)
 {
        struct ksz_device *dev = ds->priv;
 
-       if (dev->chip_id == KSZ8830_CHIP_ID) {
+       switch (dev->chip_id) {
+       case KSZ8830_CHIP_ID:
                /* Silicon Errata Sheet (DS80000830A):
                 * Port 1 does not work with LinkMD Cable-Testing.
                 * Port 1 does not respond to received PAUSE control frames.
                 */
                if (!port)
                        return MICREL_KSZ8_P1_ERRATA;
+               break;
+       case KSZ9477_CHIP_ID:
+               /* KSZ9477 Errata DS80000754C
+                *
+                * Module 4: Energy Efficient Ethernet (EEE) feature select must
+                * be manually disabled
+                *   The EEE feature is enabled by default, but it is not fully
+                *   operational. It must be manually disabled through register
+                *   controls. If not disabled, the PHY ports can auto-negotiate
+                *   to enable EEE, and this feature can cause link drops when
+                *   linked to another device supporting EEE.
+                */
+               return MICREL_NO_EEE;
        }
 
        return 0;
index dee35ba..0617d5c 100644 (file)
@@ -132,6 +132,8 @@ struct sja1105_info {
        int max_frame_mem;
        int num_ports;
        bool multiple_cascade_ports;
+       /* Every {port, TXQ} has its own CBS shaper */
+       bool fixed_cbs_mapping;
        enum dsa_tag_protocol tag_proto;
        const struct sja1105_dynamic_table_ops *dyn_ops;
        const struct sja1105_table_ops *static_ops;
index 331bb1c..a23d980 100644 (file)
@@ -2115,11 +2115,36 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
 }
 
 #define BYTES_PER_KBIT (1000LL / 8)
+/* Port 0 (the uC port) does not have CBS shapers */
+#define SJA1110_FIXED_CBS(port, prio) ((((port) - 1) * SJA1105_NUM_TC) + (prio))
+
+static int sja1105_find_cbs_shaper(struct sja1105_private *priv,
+                                  int port, int prio)
+{
+       int i;
+
+       if (priv->info->fixed_cbs_mapping) {
+               i = SJA1110_FIXED_CBS(port, prio);
+               if (i >= 0 && i < priv->info->num_cbs_shapers)
+                       return i;
+
+               return -1;
+       }
+
+       for (i = 0; i < priv->info->num_cbs_shapers; i++)
+               if (priv->cbs[i].port == port && priv->cbs[i].prio == prio)
+                       return i;
+
+       return -1;
+}
 
 static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
 {
        int i;
 
+       if (priv->info->fixed_cbs_mapping)
+               return -1;
+
        for (i = 0; i < priv->info->num_cbs_shapers; i++)
                if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope)
                        return i;
@@ -2150,14 +2175,20 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
 {
        struct sja1105_private *priv = ds->priv;
        struct sja1105_cbs_entry *cbs;
+       s64 port_transmit_rate_kbps;
        int index;
 
        if (!offload->enable)
                return sja1105_delete_cbs_shaper(priv, port, offload->queue);
 
-       index = sja1105_find_unused_cbs_shaper(priv);
-       if (index < 0)
-               return -ENOSPC;
+       /* The user may be replacing an existing shaper */
+       index = sja1105_find_cbs_shaper(priv, port, offload->queue);
+       if (index < 0) {
+               /* That isn't the case - see if we can allocate a new one */
+               index = sja1105_find_unused_cbs_shaper(priv);
+               if (index < 0)
+                       return -ENOSPC;
+       }
 
        cbs = &priv->cbs[index];
        cbs->port = port;
@@ -2167,9 +2198,17 @@ static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
         */
        cbs->credit_hi = offload->hicredit;
        cbs->credit_lo = abs(offload->locredit);
-       /* User space is in kbits/sec, hardware in bytes/sec */
-       cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT;
-       cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT);
+       /* User space is in kbits/sec, while the hardware in bytes/sec times
+        * link speed. Since the given offload->sendslope is good only for the
+        * current link speed anyway, and user space is likely to reprogram it
+        * when that changes, don't even bother to track the port's link speed,
+        * but deduce the port transmit rate from idleslope - sendslope.
+        */
+       port_transmit_rate_kbps = offload->idleslope - offload->sendslope;
+       cbs->idle_slope = div_s64(offload->idleslope * BYTES_PER_KBIT,
+                                 port_transmit_rate_kbps);
+       cbs->send_slope = div_s64(abs(offload->sendslope * BYTES_PER_KBIT),
+                                 port_transmit_rate_kbps);
        /* Convert the negative values from 64-bit 2's complement
         * to 32-bit 2's complement (for the case of 0x80000000 whose
         * negative is still negative).
index 5ce29c8..834b5c1 100644 (file)
@@ -781,6 +781,7 @@ const struct sja1105_info sja1110a_info = {
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
+       .fixed_cbs_mapping      = true,
        .ptp_ts_bits            = 32,
        .ptpegr_ts_bytes        = 8,
        .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
@@ -831,6 +832,7 @@ const struct sja1105_info sja1110b_info = {
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
+       .fixed_cbs_mapping      = true,
        .ptp_ts_bits            = 32,
        .ptpegr_ts_bytes        = 8,
        .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
@@ -881,6 +883,7 @@ const struct sja1105_info sja1110c_info = {
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
+       .fixed_cbs_mapping      = true,
        .ptp_ts_bits            = 32,
        .ptpegr_ts_bytes        = 8,
        .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
@@ -931,6 +934,7 @@ const struct sja1105_info sja1110d_info = {
        .tag_proto              = DSA_TAG_PROTO_SJA1110,
        .can_limit_mcast_flood  = true,
        .multiple_cascade_ports = true,
+       .fixed_cbs_mapping      = true,
        .ptp_ts_bits            = 32,
        .ptpegr_ts_bytes        = 8,
        .max_frame_mem          = SJA1110_MAX_FRAME_MEMORY,
index e0a4cb7..c153dc0 100644 (file)
@@ -1402,7 +1402,7 @@ static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
                return;
 
        si = enetc_psi_create(pdev);
-       if (si)
+       if (!IS_ERR(si))
                enetc_psi_destroy(pdev);
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
index ea0e38b..f281e42 100644 (file)
@@ -570,7 +570,10 @@ static int gve_rx_append_frags(struct napi_struct *napi,
                if (!skb)
                        return -1;
 
-               skb_shinfo(rx->ctx.skb_tail)->frag_list = skb;
+               if (rx->ctx.skb_tail == rx->ctx.skb_head)
+                       skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
+               else
+                       rx->ctx.skb_tail->next = skb;
                rx->ctx.skb_tail = skb;
                num_frags = 0;
        }
index a4b43bc..aaf1f42 100644 (file)
@@ -814,6 +814,7 @@ struct hnae3_tc_info {
        u8 max_tc; /* Total number of TCs */
        u8 num_tc; /* Total number of enabled TCs */
        bool mqprio_active;
+       bool dcb_ets_active;
 };
 
 #define HNAE3_MAX_DSCP                 64
index f276b5e..b850853 100644 (file)
@@ -1045,6 +1045,7 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos)
        struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
        struct hnae3_dev_specs *dev_specs = &ae_dev->dev_specs;
        struct hnae3_knic_private_info *kinfo = &h->kinfo;
+       struct net_device *dev = kinfo->netdev;
 
        *pos += scnprintf(buf + *pos, len - *pos, "dev_spec:\n");
        *pos += scnprintf(buf + *pos, len - *pos, "MAC entry num: %u\n",
@@ -1087,6 +1088,9 @@ hns3_dbg_dev_specs(struct hnae3_handle *h, char *buf, int len, int *pos)
                          dev_specs->mc_mac_size);
        *pos += scnprintf(buf + *pos, len - *pos, "MAC statistics number: %u\n",
                          dev_specs->mac_stats_num);
+       *pos += scnprintf(buf + *pos, len - *pos,
+                         "TX timeout threshold: %d seconds\n",
+                         dev->watchdog_timeo / HZ);
 }
 
 static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len)
@@ -1411,9 +1415,9 @@ int hns3_dbg_init(struct hnae3_handle *handle)
        return 0;
 
 out:
-       mutex_destroy(&handle->dbgfs_lock);
        debugfs_remove_recursive(handle->hnae3_dbgfs);
        handle->hnae3_dbgfs = NULL;
+       mutex_destroy(&handle->dbgfs_lock);
        return ret;
 }
 
@@ -1421,6 +1425,9 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
 {
        u32 i;
 
+       debugfs_remove_recursive(handle->hnae3_dbgfs);
+       handle->hnae3_dbgfs = NULL;
+
        for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++)
                if (handle->dbgfs_buf[i]) {
                        kvfree(handle->dbgfs_buf[i]);
@@ -1428,8 +1435,6 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
                }
 
        mutex_destroy(&handle->dbgfs_lock);
-       debugfs_remove_recursive(handle->hnae3_dbgfs);
-       handle->hnae3_dbgfs = NULL;
 }
 
 void hns3_dbg_register_debugfs(const char *debugfs_dir_name)
index eac2d05..b4895c7 100644 (file)
@@ -2103,8 +2103,12 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
         */
        if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num &&
            !ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) {
+               /* This smp_store_release() pairs with smp_load_aquire() in
+                * hns3_nic_reclaim_desc(). Ensure that the BD valid bit
+                * is updated.
+                */
+               smp_store_release(&ring->last_to_use, ring->next_to_use);
                hns3_tx_push_bd(ring, num);
-               WRITE_ONCE(ring->last_to_use, ring->next_to_use);
                return;
        }
 
@@ -2115,6 +2119,11 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
                return;
        }
 
+       /* This smp_store_release() pairs with smp_load_aquire() in
+        * hns3_nic_reclaim_desc(). Ensure that the BD valid bit is updated.
+        */
+       smp_store_release(&ring->last_to_use, ring->next_to_use);
+
        if (ring->tqp->mem_base)
                hns3_tx_mem_doorbell(ring);
        else
@@ -2122,7 +2131,6 @@ static void hns3_tx_doorbell(struct hns3_enet_ring *ring, int num,
                       ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
 
        ring->pending_buf = 0;
-       WRITE_ONCE(ring->last_to_use, ring->next_to_use);
 }
 
 static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb,
@@ -3308,8 +3316,6 @@ static void hns3_set_default_feature(struct net_device *netdev)
 
        netdev->priv_flags |= IFF_UNICAST_FLT;
 
-       netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
-
        netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
                NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
                NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
@@ -3563,9 +3569,8 @@ static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i)
 static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
                                  int *bytes, int *pkts, int budget)
 {
-       /* pair with ring->last_to_use update in hns3_tx_doorbell(),
-        * smp_store_release() is not used in hns3_tx_doorbell() because
-        * the doorbell operation already have the needed barrier operation.
+       /* This smp_load_acquire() pairs with smp_store_release() in
+        * hns3_tx_doorbell().
         */
        int ltu = smp_load_acquire(&ring->last_to_use);
        int ntc = ring->next_to_clean;
index 36858a7..682239f 100644 (file)
@@ -773,7 +773,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
                hns3_get_ksettings(h, cmd);
                break;
        case HNAE3_MEDIA_TYPE_FIBER:
-               if (module_type == HNAE3_MODULE_TYPE_CR)
+               if (module_type == HNAE3_MODULE_TYPE_UNKNOWN)
+                       cmd->base.port = PORT_OTHER;
+               else if (module_type == HNAE3_MODULE_TYPE_CR)
                        cmd->base.port = PORT_DA;
                else
                        cmd->base.port = PORT_FIBRE;
index fad5a5f..b98301e 100644 (file)
@@ -259,7 +259,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
        int ret;
 
        if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
-           hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+           h->kinfo.tc_info.mqprio_active)
                return -EINVAL;
 
        ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
@@ -275,10 +275,7 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
        }
 
        hclge_tm_schd_info_update(hdev, num_tc);
-       if (num_tc > 1)
-               hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
-       else
-               hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+       h->kinfo.tc_info.dcb_ets_active = num_tc > 1;
 
        ret = hclge_ieee_ets_to_tm_info(hdev, ets);
        if (ret)
@@ -487,7 +484,7 @@ static u8 hclge_getdcbx(struct hnae3_handle *h)
        struct hclge_vport *vport = hclge_get_vport(h);
        struct hclge_dev *hdev = vport->back;
 
-       if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+       if (h->kinfo.tc_info.mqprio_active)
                return 0;
 
        return hdev->dcbx_cap;
@@ -611,7 +608,8 @@ static int hclge_setup_tc(struct hnae3_handle *h,
        if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state))
                return -EBUSY;
 
-       if (hdev->flag & HCLGE_FLAG_DCB_ENABLE)
+       kinfo = &vport->nic.kinfo;
+       if (kinfo->tc_info.dcb_ets_active)
                return -EINVAL;
 
        ret = hclge_mqprio_qopt_check(hdev, mqprio_qopt);
@@ -625,7 +623,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
        if (ret)
                return ret;
 
-       kinfo = &vport->nic.kinfo;
        memcpy(&old_tc_info, &kinfo->tc_info, sizeof(old_tc_info));
        hclge_sync_mqprio_qopt(&kinfo->tc_info, mqprio_qopt);
        kinfo->tc_info.mqprio_active = tc > 0;
@@ -634,13 +631,6 @@ static int hclge_setup_tc(struct hnae3_handle *h,
        if (ret)
                goto err_out;
 
-       hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
-
-       if (tc > 1)
-               hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE;
-       else
-               hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE;
-
        return hclge_notify_init_up(hdev);
 
 err_out:
index f01a7a9..ff3f8f4 100644 (file)
@@ -1519,7 +1519,7 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
        struct hclge_desc desc[3];
        int pos = 0;
        int ret, i;
-       u32 *req;
+       __le32 *req;
 
        hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, true);
        desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
@@ -1544,22 +1544,22 @@ static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, bool sel_x,
                         tcam_msg.loc);
 
        /* tcam_data0 ~ tcam_data1 */
-       req = (u32 *)req1->tcam_data;
+       req = (__le32 *)req1->tcam_data;
        for (i = 0; i < 2; i++)
                pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
-                                "%08x\n", *req++);
+                                "%08x\n", le32_to_cpu(*req++));
 
        /* tcam_data2 ~ tcam_data7 */
-       req = (u32 *)req2->tcam_data;
+       req = (__le32 *)req2->tcam_data;
        for (i = 0; i < 6; i++)
                pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
-                                "%08x\n", *req++);
+                                "%08x\n", le32_to_cpu(*req++));
 
        /* tcam_data8 ~ tcam_data12 */
-       req = (u32 *)req3->tcam_data;
+       req = (__le32 *)req3->tcam_data;
        for (i = 0; i < 5; i++)
                pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
-                                "%08x\n", *req++);
+                                "%08x\n", le32_to_cpu(*req++));
 
        return ret;
 }
index 0f50dba..8ca3684 100644 (file)
@@ -11026,6 +11026,7 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle,
 
 static void hclge_info_show(struct hclge_dev *hdev)
 {
+       struct hnae3_handle *handle = &hdev->vport->nic;
        struct device *dev = &hdev->pdev->dev;
 
        dev_info(dev, "PF info begin:\n");
@@ -11042,9 +11043,9 @@ static void hclge_info_show(struct hclge_dev *hdev)
        dev_info(dev, "This is %s PF\n",
                 hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
        dev_info(dev, "DCB %s\n",
-                hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
+                handle->kinfo.tc_info.dcb_ets_active ? "enable" : "disable");
        dev_info(dev, "MQPRIO %s\n",
-                hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
+                handle->kinfo.tc_info.mqprio_active ? "enable" : "disable");
        dev_info(dev, "Default tx spare buffer size: %u\n",
                 hdev->tx_spare_buf_size);
 
index ec233ec..7bc2049 100644 (file)
@@ -919,8 +919,6 @@ struct hclge_dev {
 
 #define HCLGE_FLAG_MAIN                        BIT(0)
 #define HCLGE_FLAG_DCB_CAPABLE         BIT(1)
-#define HCLGE_FLAG_DCB_ENABLE          BIT(2)
-#define HCLGE_FLAG_MQPRIO_ENABLE       BIT(3)
        u32 flag;
 
        u32 pkt_buf_size; /* Total pf buf size for tx/rx */
index 015b781..a2b7595 100644 (file)
@@ -34,11 +34,11 @@ struct igb_adapter;
 /* TX/RX descriptor defines */
 #define IGB_DEFAULT_TXD                256
 #define IGB_DEFAULT_TX_WORK    128
-#define IGB_MIN_TXD            80
+#define IGB_MIN_TXD            64
 #define IGB_MAX_TXD            4096
 
 #define IGB_DEFAULT_RXD                256
-#define IGB_MIN_RXD            80
+#define IGB_MIN_RXD            64
 #define IGB_MAX_RXD            4096
 
 #define IGB_DEFAULT_ITR                3 /* dynamic */
index 1ab787e..13ba9c7 100644 (file)
@@ -3933,8 +3933,9 @@ static void igb_probe_vfs(struct igb_adapter *adapter)
        struct pci_dev *pdev = adapter->pdev;
        struct e1000_hw *hw = &adapter->hw;
 
-       /* Virtualization features not supported on i210 family. */
-       if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
+       /* Virtualization features not supported on i210 and 82580 family. */
+       if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) ||
+           (hw->mac.type == e1000_82580))
                return;
 
        /* Of the below we really only want the effect of getting
index 57d39ee..7b83678 100644 (file)
@@ -39,11 +39,11 @@ enum latency_range {
 /* Tx/Rx descriptor defines */
 #define IGBVF_DEFAULT_TXD      256
 #define IGBVF_MAX_TXD          4096
-#define IGBVF_MIN_TXD          80
+#define IGBVF_MIN_TXD          64
 
 #define IGBVF_DEFAULT_RXD      256
 #define IGBVF_MAX_RXD          4096
-#define IGBVF_MIN_RXD          80
+#define IGBVF_MIN_RXD          64
 
 #define IGBVF_MIN_ITR_USECS    10 /* 100000 irq/sec */
 #define IGBVF_MAX_ITR_USECS    10000 /* 100    irq/sec */
index 8ebe699..f48f82d 100644 (file)
@@ -379,11 +379,11 @@ static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
 /* TX/RX descriptor defines */
 #define IGC_DEFAULT_TXD                256
 #define IGC_DEFAULT_TX_WORK    128
-#define IGC_MIN_TXD            80
+#define IGC_MIN_TXD            64
 #define IGC_MAX_TXD            4096
 
 #define IGC_DEFAULT_RXD                256
-#define IGC_MIN_RXD            80
+#define IGC_MIN_RXD            64
 #define IGC_MAX_RXD            4096
 
 /* Supported Rx Buffer Sizes */
index c2f6867..23c2f2e 100644 (file)
@@ -846,6 +846,21 @@ static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block,
        return 0;
 }
 
+static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req,
+                              u16 *smq, u16 *smq_mask)
+{
+       struct nix_cn10k_aq_enq_req *aq_req;
+
+       if (!is_rvu_otx2(rvu)) {
+               aq_req = (struct nix_cn10k_aq_enq_req *)req;
+               *smq = aq_req->sq.smq;
+               *smq_mask = aq_req->sq_mask.smq;
+       } else {
+               *smq = req->sq.smq;
+               *smq_mask = req->sq_mask.smq;
+       }
+}
+
 static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
                                   struct nix_aq_enq_req *req,
                                   struct nix_aq_enq_rsp *rsp)
@@ -857,6 +872,7 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
        struct rvu_block *block;
        struct admin_queue *aq;
        struct rvu_pfvf *pfvf;
+       u16 smq, smq_mask;
        void *ctx, *mask;
        bool ena;
        u64 cfg;
@@ -928,13 +944,14 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
        if (rc)
                return rc;
 
+       nix_get_aq_req_smq(rvu, req, &smq, &smq_mask);
        /* Check if SQ pointed SMQ belongs to this PF/VF or not */
        if (req->ctype == NIX_AQ_CTYPE_SQ &&
            ((req->op == NIX_AQ_INSTOP_INIT && req->sq.ena) ||
             (req->op == NIX_AQ_INSTOP_WRITE &&
-             req->sq_mask.ena && req->sq_mask.smq && req->sq.ena))) {
+             req->sq_mask.ena && req->sq.ena && smq_mask))) {
                if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ,
-                                    pcifunc, req->sq.smq))
+                                    pcifunc, smq))
                        return NIX_AF_ERR_AQ_ENQUEUE;
        }
 
index 92d3952..feeb416 100644 (file)
@@ -17,8 +17,10 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
        if (err)
                return err;
 
-       if (mlx5e_is_eswitch_flow(parse_state->flow))
+       if (mlx5e_is_eswitch_flow(parse_state->flow)) {
                attr->esw_attr->split_count = attr->esw_attr->out_count;
+               parse_state->if_count = 0;
+       }
 
        attr->flags |= MLX5_ATTR_FLAG_CT;
 
index 291193f..f63402c 100644 (file)
@@ -294,6 +294,7 @@ parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state,
        if (err)
                return err;
 
+       parse_state->if_count = 0;
        esw_attr->out_count++;
        return 0;
 }
index 3b272bb..368a95f 100644 (file)
@@ -98,8 +98,10 @@ tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
 
        attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 
-       if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+       if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
                esw_attr->split_count = esw_attr->out_count;
+               parse_state->if_count = 0;
+       }
 
        return 0;
 }
index ad09a8a..2d1d4a0 100644 (file)
@@ -66,6 +66,7 @@ tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
        if (err)
                return err;
 
+       parse_state->if_count = 0;
        esw_attr->out_count++;
 
        return 0;
index c8a3eaf..a13c5e7 100644 (file)
@@ -166,6 +166,7 @@ tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
                return err;
 
        esw_attr->split_count = esw_attr->out_count;
+       parse_state->if_count = 0;
 
        return 0;
 }
index 310b992..f17575b 100644 (file)
@@ -65,8 +65,10 @@ tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
        if (err)
                return err;
 
-       if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+       if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
                attr->esw_attr->split_count = attr->esw_attr->out_count;
+               parse_state->if_count = 0;
+       }
 
        return 0;
 }
index 3180836..c24828b 100644 (file)
@@ -3936,6 +3936,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
                        }
 
                        i_split = i + 1;
+                       parse_state->if_count = 0;
                        list_add(&attr->list, &flow->attrs);
                }
 
index 6cd7d64..d4cde65 100644 (file)
@@ -1276,12 +1276,19 @@ int
 mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
                                 enum mlx5_eswitch_vport_event enabled_events)
 {
+       bool pf_needed;
        int ret;
 
+       pf_needed = mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+                   esw->mode == MLX5_ESWITCH_LEGACY;
+
        /* Enable PF vport */
-       ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events);
-       if (ret)
-               return ret;
+       if (pf_needed) {
+               ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF,
+                                                   enabled_events);
+               if (ret)
+                       return ret;
+       }
 
        /* Enable external host PF HCA */
        ret = host_pf_enable_hca(esw->dev);
@@ -1317,7 +1324,8 @@ ec_vf_err:
 ecpf_err:
        host_pf_disable_hca(esw->dev);
 pf_hca_err:
-       mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+       if (pf_needed)
+               mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
        return ret;
 }
 
@@ -1335,7 +1343,10 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw)
        }
 
        host_pf_disable_hca(esw->dev);
-       mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+
+       if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+           esw->mode == MLX5_ESWITCH_LEGACY)
+               mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
 }
 
 static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw)
index 752fb0d..b296ac5 100644 (file)
@@ -3216,26 +3216,47 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
        esw_acl_ingress_ofld_cleanup(esw, vport);
 }
 
-static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
 {
-       struct mlx5_vport *vport;
+       struct mlx5_vport *uplink, *manager;
+       int ret;
 
-       vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
-       if (IS_ERR(vport))
-               return PTR_ERR(vport);
+       uplink = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+       if (IS_ERR(uplink))
+               return PTR_ERR(uplink);
+
+       ret = esw_vport_create_offloads_acl_tables(esw, uplink);
+       if (ret)
+               return ret;
+
+       manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+       if (IS_ERR(manager)) {
+               ret = PTR_ERR(manager);
+               goto err_manager;
+       }
 
-       return esw_vport_create_offloads_acl_tables(esw, vport);
+       ret = esw_vport_create_offloads_acl_tables(esw, manager);
+       if (ret)
+               goto err_manager;
+
+       return 0;
+
+err_manager:
+       esw_vport_destroy_offloads_acl_tables(esw, uplink);
+       return ret;
 }
 
-static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
 {
        struct mlx5_vport *vport;
 
-       vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
-       if (IS_ERR(vport))
-               return;
+       vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+       if (!IS_ERR(vport))
+               esw_vport_destroy_offloads_acl_tables(esw, vport);
 
-       esw_vport_destroy_offloads_acl_tables(esw, vport);
+       vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+       if (!IS_ERR(vport))
+               esw_vport_destroy_offloads_acl_tables(esw, vport);
 }
 
 int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
@@ -3280,7 +3301,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
        }
        esw->fdb_table.offloads.indir = indir;
 
-       err = esw_create_uplink_offloads_acl_tables(esw);
+       err = esw_create_offloads_acl_tables(esw);
        if (err)
                goto create_acl_err;
 
@@ -3321,7 +3342,7 @@ create_fdb_err:
 create_restore_err:
        esw_destroy_offloads_table(esw);
 create_offloads_err:
-       esw_destroy_uplink_offloads_acl_tables(esw);
+       esw_destroy_offloads_acl_tables(esw);
 create_acl_err:
        mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
 create_indir_err:
@@ -3337,7 +3358,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
        esw_destroy_offloads_fdb_tables(esw);
        esw_destroy_restore_table(esw);
        esw_destroy_offloads_table(esw);
-       esw_destroy_uplink_offloads_acl_tables(esw);
+       esw_destroy_offloads_acl_tables(esw);
        mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
        mutex_destroy(&esw->fdb_table.offloads.vports.lock);
 }
index 2375cef..f77a2d3 100644 (file)
@@ -359,26 +359,36 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
 /* Handle a received packet.  Second half: Touches packet payload. */
 void __efx_rx_packet(struct efx_channel *channel)
 {
+       struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
        struct efx_nic *efx = channel->efx;
        struct efx_rx_buffer *rx_buf =
-               efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
+               efx_rx_buffer(rx_queue, channel->rx_pkt_index);
        u8 *eh = efx_rx_buf_va(rx_buf);
 
        /* Read length from the prefix if necessary.  This already
         * excludes the length of the prefix itself.
         */
-       if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN)
+       if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) {
                rx_buf->len = le16_to_cpup((__le16 *)
                                           (eh + efx->rx_packet_len_offset));
+               /* A known issue may prevent this being filled in;
+                * if that happens, just drop the packet.
+                * Must do that in the driver since passing a zero-length
+                * packet up to the stack may cause a crash.
+                */
+               if (unlikely(!rx_buf->len)) {
+                       efx_free_rx_buffers(rx_queue, rx_buf,
+                                           channel->rx_pkt_n_frags);
+                       channel->n_rx_frm_trunc++;
+                       goto out;
+               }
+       }
 
        /* If we're in loopback test, then pass the packet directly to the
         * loopback layer, and free the rx_buf here
         */
        if (unlikely(efx->loopback_selftest)) {
-               struct efx_rx_queue *rx_queue;
-
                efx_loopback_rx_packet(efx, eh, rx_buf->len);
-               rx_queue = efx_channel_get_rx_queue(channel);
                efx_free_rx_buffers(rx_queue, rx_buf,
                                    channel->rx_pkt_n_frags);
                goto out;
index 35f4b14..0f28795 100644 (file)
@@ -419,9 +419,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
                return ERR_PTR(phy_mode);
 
        plat->phy_interface = phy_mode;
-       plat->mac_interface = stmmac_of_get_mac_mode(np);
-       if (plat->mac_interface < 0)
-               plat->mac_interface = plat->phy_interface;
+       rc = stmmac_of_get_mac_mode(np);
+       plat->mac_interface = rc < 0 ? plat->phy_interface : rc;
 
        /* Some wrapper drivers still rely on phy_node. Let's save it while
         * they are not converted to phylink. */
index c3f3066..b7e1514 100644 (file)
@@ -1330,8 +1330,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len)
        struct crypto_aead *tfm;
        int ret;
 
-       /* Pick a sync gcm(aes) cipher to ensure order is preserved. */
-       tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+       tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
 
        if (IS_ERR(tfm))
                return tfm;
index b6d7981..927d3d5 100644 (file)
@@ -1800,9 +1800,6 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = {
        /* Transmit waveform amplitude can be improved (1000BASE-T, 100BASE-TX, 10BASE-Te) */
        {0x1c, 0x04, 0x00d0},
 
-       /* Energy Efficient Ethernet (EEE) feature select must be manually disabled */
-       {0x07, 0x3c, 0x0000},
-
        /* Register settings are required to meet data sheet supply current specifications */
        {0x1c, 0x13, 0x6eff},
        {0x1c, 0x14, 0xe6ff},
@@ -1847,6 +1844,12 @@ static int ksz9477_config_init(struct phy_device *phydev)
                        return err;
        }
 
+       /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes
+        * in this switch shall be regarded as broken.
+        */
+       if (phydev->dev_flags & MICREL_NO_EEE)
+               phydev->eee_broken_modes = -1;
+
        err = genphy_restart_aneg(phydev);
        if (err)
                return err;
index d43e62e..9c6f4f8 100644 (file)
@@ -344,6 +344,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
        struct veth_rq *rq = NULL;
+       int ret = NETDEV_TX_OK;
        struct net_device *rcv;
        int length = skb->len;
        bool use_napi = false;
@@ -378,11 +379,12 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
        } else {
 drop:
                atomic64_inc(&priv->dropped);
+               ret = NET_XMIT_DROP;
        }
 
        rcu_read_unlock();
 
-       return NETDEV_TX_OK;
+       return ret;
 }
 
 static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
index dca25a0..3ae4b41 100644 (file)
@@ -336,6 +336,7 @@ MODULE_DEVICE_TABLE(of, of_nxp_nci_i2c_match);
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id acpi_id[] = {
        { "NXP1001" },
+       { "NXP1002" },
        { "NXP7471" },
        { }
 };
index 4940b63..d687e8c 100644 (file)
@@ -941,13 +941,10 @@ static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
                ndev->debugfs_dir =
                        debugfs_create_dir(pci_name(ndev->ntb.pdev),
                                           debugfs_dir);
-               if (IS_ERR(ndev->debugfs_dir))
-                       ndev->debugfs_info = NULL;
-               else
-                       ndev->debugfs_info =
-                               debugfs_create_file("info", S_IRUSR,
-                                                   ndev->debugfs_dir, ndev,
-                                                   &amd_ntb_debugfs_info);
+               ndev->debugfs_info =
+                       debugfs_create_file("info", S_IRUSR,
+                                           ndev->debugfs_dir, ndev,
+                                           &amd_ntb_debugfs_info);
        }
 }
 
index 2abd223..f9e7847 100644 (file)
@@ -909,7 +909,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
        return 0;
 }
 
-static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+static void ntb_qp_link_context_reset(struct ntb_transport_qp *qp)
 {
        qp->link_is_up = false;
        qp->active = false;
@@ -932,6 +932,13 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
        qp->tx_async = 0;
 }
 
+static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
+{
+       ntb_qp_link_context_reset(qp);
+       if (qp->remote_rx_info)
+               qp->remote_rx_info->entry = qp->rx_max_entry - 1;
+}
+
 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
 {
        struct ntb_transport_ctx *nt = qp->transport;
@@ -1174,7 +1181,7 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt,
        qp->ndev = nt->ndev;
        qp->client_ready = false;
        qp->event_handler = NULL;
-       ntb_qp_link_down_reset(qp);
+       ntb_qp_link_context_reset(qp);
 
        if (mw_num < qp_count % mw_count)
                num_qps_mw = qp_count / mw_count + 1;
@@ -1894,7 +1901,7 @@ err:
 static int ntb_process_tx(struct ntb_transport_qp *qp,
                          struct ntb_queue_entry *entry)
 {
-       if (qp->tx_index == qp->remote_rx_info->entry) {
+       if (!ntb_transport_tx_free_entry(qp)) {
                qp->tx_ring_full++;
                return -EAGAIN;
        }
@@ -2276,9 +2283,13 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
        struct ntb_queue_entry *entry;
        int rc;
 
-       if (!qp || !qp->link_is_up || !len)
+       if (!qp || !len)
                return -EINVAL;
 
+       /* If the qp link is down already, just ignore. */
+       if (!qp->link_is_up)
+               return 0;
+
        entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
        if (!entry) {
                qp->tx_err_no_buf++;
@@ -2418,7 +2429,7 @@ unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
        unsigned int head = qp->tx_index;
        unsigned int tail = qp->remote_rx_info->entry;
 
-       return tail > head ? tail - head : qp->tx_max_entry + tail - head;
+       return tail >= head ? tail - head : qp->tx_max_entry + tail - head;
 }
 EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry);
 
index 65e1e5c..553f1f4 100644 (file)
@@ -1355,7 +1355,7 @@ static void perf_setup_dbgfs(struct perf_ctx *perf)
        struct pci_dev *pdev = perf->ntb->pdev;
 
        perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir);
-       if (!perf->dbgfs_dir) {
+       if (IS_ERR(perf->dbgfs_dir)) {
                dev_warn(&perf->ntb->dev, "DebugFS unsupported\n");
                return;
        }
index eeeb4b1..641cb7e 100644 (file)
@@ -370,16 +370,9 @@ static ssize_t tool_fn_write(struct tool_ctx *tc,
        if (*offp)
                return 0;
 
-       buf = kmalloc(size + 1, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
-
-       if (copy_from_user(buf, ubuf, size)) {
-               kfree(buf);
-               return -EFAULT;
-       }
-
-       buf[size] = 0;
+       buf = memdup_user_nul(ubuf, size);
+       if (IS_ERR(buf))
+               return PTR_ERR(buf);
 
        n = sscanf(buf, "%c %lli", &cmd, &bits);
 
@@ -1495,8 +1488,6 @@ static void tool_setup_dbgfs(struct tool_ctx *tc)
 
        tc->dbgfs_dir = debugfs_create_dir(dev_name(&tc->ntb->dev),
                                           tool_dbgfs_topdir);
-       if (!tc->dbgfs_dir)
-               return;
 
        debugfs_create_file("port", 0600, tc->dbgfs_dir,
                            tc, &tool_port_fops);
index 49bd09c..e9ae66c 100644 (file)
@@ -196,7 +196,7 @@ config PCI_HYPERV
 
 config PCI_DYNAMIC_OF_NODES
        bool "Create Device tree nodes for PCI devices"
-       depends on OF
+       depends on OF_IRQ
        select OF_DYNAMIC
        help
          This option enables support for generating device tree nodes for some
index ab2a4a3..7955345 100644 (file)
@@ -997,6 +997,7 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
                res = window->res;
                if (!res->flags && !res->start && !res->end) {
                        release_resource(res);
+                       resource_list_destroy_entry(window);
                        continue;
                }
 
index 5de09d2..eeec1d6 100644 (file)
@@ -3726,7 +3726,7 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
  */
 static void quirk_nvidia_no_bus_reset(struct pci_dev *dev)
 {
-       if ((dev->device & 0xffc0) == 0x2340 || dev->device == 0x1eb8)
+       if ((dev->device & 0xffc0) == 0x2340)
                quirk_no_bus_reset(dev);
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
index e5a2ac4..8fcaa26 100644 (file)
@@ -749,6 +749,8 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu)
 
        /* Enable all counters */
        armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
+
+       kvm_vcpu_pmu_resync_el0();
 }
 
 static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
index 0a8f597..365d964 100644 (file)
@@ -25,7 +25,7 @@
 #include "../cxl/pmu.h"
 
 #define CXL_PMU_CAP_REG                        0x0
-#define   CXL_PMU_CAP_NUM_COUNTERS_MSK                 GENMASK_ULL(4, 0)
+#define   CXL_PMU_CAP_NUM_COUNTERS_MSK                 GENMASK_ULL(5, 0)
 #define   CXL_PMU_CAP_COUNTER_WIDTH_MSK                        GENMASK_ULL(15, 8)
 #define   CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK          GENMASK_ULL(24, 20)
 #define   CXL_PMU_CAP_FILTERS_SUP_MSK                  GENMASK_ULL(39, 32)
index 4aa466c..0b69fb7 100644 (file)
@@ -1309,8 +1309,8 @@ static int psy_register_thermal(struct power_supply *psy)
                struct thermal_zone_params tzp = {
                        .no_hwmon = IS_ENABLED(CONFIG_POWER_SUPPLY_HWMON)
                };
-               psy->tzd = thermal_zone_device_register(psy->desc->name,
-                               0, 0, psy, &psy_tzd_ops, &tzp, 0, 0);
+               psy->tzd = thermal_tripless_zone_device_register(psy->desc->name,
+                               psy, &psy_tzd_ops, &tzp);
                if (IS_ERR(psy->tzd))
                        return PTR_ERR(psy->tzd);
                ret = thermal_zone_device_enable(psy->tzd);
index 5c2e6d5..40a2cc6 100644 (file)
@@ -658,8 +658,6 @@ static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
                            RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
        [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
                            RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
-       [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
-                               RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
        [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
                            RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
        [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
@@ -1458,7 +1456,7 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
                        }
                }
 
-               if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
+               if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64))
                        rd->rpl[i].name = NULL;
        }
 }
index 6210bab..8ebcddf 100644 (file)
@@ -505,7 +505,7 @@ config PWM_ROCKCHIP
 
 config PWM_RZ_MTU3
        tristate "Renesas RZ/G2L MTU3a PWM Timer support"
-       depends on RZ_MTU3 || COMPILE_TEST
+       depends on RZ_MTU3
        depends on HAS_IOMEM
        help
          This driver exposes the MTU3a PWM Timer controller found in Renesas
index 3daccea..dc66e34 100644 (file)
@@ -8,8 +8,8 @@
 
 #include <linux/acpi.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/pwm.h>
-#include <linux/radix-tree.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/err.h>
@@ -127,28 +127,28 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
 }
 
 struct pwm_device *
-of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
+of_pwm_xlate_with_flags(struct pwm_chip *chip, const struct of_phandle_args *args)
 {
        struct pwm_device *pwm;
 
-       if (pc->of_pwm_n_cells < 2)
+       if (chip->of_pwm_n_cells < 2)
                return ERR_PTR(-EINVAL);
 
        /* flags in the third cell are optional */
        if (args->args_count < 2)
                return ERR_PTR(-EINVAL);
 
-       if (args->args[0] >= pc->npwm)
+       if (args->args[0] >= chip->npwm)
                return ERR_PTR(-EINVAL);
 
-       pwm = pwm_request_from_chip(pc, args->args[0], NULL);
+       pwm = pwm_request_from_chip(chip, args->args[0], NULL);
        if (IS_ERR(pwm))
                return pwm;
 
        pwm->args.period = args->args[1];
        pwm->args.polarity = PWM_POLARITY_NORMAL;
 
-       if (pc->of_pwm_n_cells >= 3) {
+       if (chip->of_pwm_n_cells >= 3) {
                if (args->args_count > 2 && args->args[2] & PWM_POLARITY_INVERTED)
                        pwm->args.polarity = PWM_POLARITY_INVERSED;
        }
@@ -158,18 +158,18 @@ of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
 EXPORT_SYMBOL_GPL(of_pwm_xlate_with_flags);
 
 struct pwm_device *
-of_pwm_single_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
+of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
 {
        struct pwm_device *pwm;
 
-       if (pc->of_pwm_n_cells < 1)
+       if (chip->of_pwm_n_cells < 1)
                return ERR_PTR(-EINVAL);
 
        /* validate that one cell is specified, optionally with flags */
        if (args->args_count != 1 && args->args_count != 2)
                return ERR_PTR(-EINVAL);
 
-       pwm = pwm_request_from_chip(pc, 0, NULL);
+       pwm = pwm_request_from_chip(chip, 0, NULL);
        if (IS_ERR(pwm))
                return pwm;
 
@@ -312,22 +312,19 @@ EXPORT_SYMBOL_GPL(pwmchip_add);
  * pwmchip_remove() - remove a PWM chip
  * @chip: the PWM chip to remove
  *
- * Removes a PWM chip. This function may return busy if the PWM chip provides
- * a PWM device that is still requested.
- *
- * Returns: 0 on success or a negative error code on failure.
+ * Removes a PWM chip.
  */
 void pwmchip_remove(struct pwm_chip *chip)
 {
        pwmchip_sysfs_unexport(chip);
 
+       if (IS_ENABLED(CONFIG_OF))
+               of_pwmchip_remove(chip);
+
        mutex_lock(&pwm_lock);
 
        list_del_init(&chip->list);
 
-       if (IS_ENABLED(CONFIG_OF))
-               of_pwmchip_remove(chip);
-
        free_pwms(chip);
 
        mutex_unlock(&pwm_lock);
@@ -692,7 +689,7 @@ static struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np,
        struct pwm_device *pwm = NULL;
        struct of_phandle_args args;
        struct device_link *dl;
-       struct pwm_chip *pc;
+       struct pwm_chip *chip;
        int index = 0;
        int err;
 
@@ -709,16 +706,16 @@ static struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np,
                return ERR_PTR(err);
        }
 
-       pc = fwnode_to_pwmchip(of_fwnode_handle(args.np));
-       if (IS_ERR(pc)) {
-               if (PTR_ERR(pc) != -EPROBE_DEFER)
+       chip = fwnode_to_pwmchip(of_fwnode_handle(args.np));
+       if (IS_ERR(chip)) {
+               if (PTR_ERR(chip) != -EPROBE_DEFER)
                        pr_err("%s(): PWM chip not found\n", __func__);
 
-               pwm = ERR_CAST(pc);
+               pwm = ERR_CAST(chip);
                goto put;
        }
 
-       pwm = pc->of_xlate(pc, &args);
+       pwm = chip->of_xlate(chip, &args);
        if (IS_ERR(pwm))
                goto put;
 
index a38a62e..8e7d67f 100644 (file)
@@ -12,6 +12,7 @@
  * - When APPLE_PWM_CTRL is set to 0, the output is constant low
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
index 96a709a..e271d92 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/delay.h>
 #include <linux/mfd/atmel-hlcdc.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
@@ -38,11 +39,11 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip)
        return container_of(chip, struct atmel_hlcdc_pwm, chip);
 }
 
-static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
+static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                                 const struct pwm_state *state)
 {
-       struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
-       struct atmel_hlcdc *hlcdc = chip->hlcdc;
+       struct atmel_hlcdc_pwm *atmel = to_atmel_hlcdc_pwm(chip);
+       struct atmel_hlcdc *hlcdc = atmel->hlcdc;
        unsigned int status;
        int ret;
 
@@ -54,7 +55,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
                u32 pwmcfg;
                int pres;
 
-               if (!chip->errata || !chip->errata->slow_clk_erratum) {
+               if (!atmel->errata || !atmel->errata->slow_clk_erratum) {
                        clk_freq = clk_get_rate(new_clk);
                        if (!clk_freq)
                                return -EINVAL;
@@ -64,7 +65,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
                }
 
                /* Errata: cannot use slow clk on some IP revisions */
-               if ((chip->errata && chip->errata->slow_clk_erratum) ||
+               if ((atmel->errata && atmel->errata->slow_clk_erratum) ||
                    clk_period_ns > state->period) {
                        new_clk = hlcdc->sys_clk;
                        clk_freq = clk_get_rate(new_clk);
@@ -77,8 +78,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
 
                for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) {
                /* Errata: cannot divide by 1 on some IP revisions */
-                       if (!pres && chip->errata &&
-                           chip->errata->div1_clk_erratum)
+                       if (!pres && atmel->errata &&
+                           atmel->errata->div1_clk_erratum)
                                continue;
 
                        if ((clk_period_ns << pres) >= state->period)
@@ -90,7 +91,7 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
 
                pwmcfg = ATMEL_HLCDC_PWMPS(pres);
 
-               if (new_clk != chip->cur_clk) {
+               if (new_clk != atmel->cur_clk) {
                        u32 gencfg = 0;
                        int ret;
 
@@ -98,8 +99,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
                        if (ret)
                                return ret;
 
-                       clk_disable_unprepare(chip->cur_clk);
-                       chip->cur_clk = new_clk;
+                       clk_disable_unprepare(atmel->cur_clk);
+                       atmel->cur_clk = new_clk;
 
                        if (new_clk == hlcdc->sys_clk)
                                gencfg = ATMEL_HLCDC_CLKPWMSEL;
@@ -160,8 +161,8 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
                if (ret)
                        return ret;
 
-               clk_disable_unprepare(chip->cur_clk);
-               chip->cur_clk = NULL;
+               clk_disable_unprepare(atmel->cur_clk);
+               atmel->cur_clk = NULL;
        }
 
        return 0;
@@ -183,31 +184,32 @@ static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_sama5d3_errata = {
 #ifdef CONFIG_PM_SLEEP
 static int atmel_hlcdc_pwm_suspend(struct device *dev)
 {
-       struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev);
+       struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev);
 
        /* Keep the periph clock enabled if the PWM is still running. */
-       if (pwm_is_enabled(&chip->chip.pwms[0]))
-               clk_disable_unprepare(chip->hlcdc->periph_clk);
+       if (pwm_is_enabled(&atmel->chip.pwms[0]))
+               clk_disable_unprepare(atmel->hlcdc->periph_clk);
 
        return 0;
 }
 
 static int atmel_hlcdc_pwm_resume(struct device *dev)
 {
-       struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev);
+       struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev);
        struct pwm_state state;
        int ret;
 
-       pwm_get_state(&chip->chip.pwms[0], &state);
+       pwm_get_state(&atmel->chip.pwms[0], &state);
 
        /* Re-enable the periph clock it was stopped during suspend. */
        if (!state.enabled) {
-               ret = clk_prepare_enable(chip->hlcdc->periph_clk);
+               ret = clk_prepare_enable(atmel->hlcdc->periph_clk);
                if (ret)
                        return ret;
        }
 
-       return atmel_hlcdc_pwm_apply(&chip->chip, &chip->chip.pwms[0], &state);
+       return atmel_hlcdc_pwm_apply(&atmel->chip, &atmel->chip.pwms[0],
+                                    &state);
 }
 #endif
 
@@ -244,14 +246,14 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
 {
        const struct of_device_id *match;
        struct device *dev = &pdev->dev;
-       struct atmel_hlcdc_pwm *chip;
+       struct atmel_hlcdc_pwm *atmel;
        struct atmel_hlcdc *hlcdc;
        int ret;
 
        hlcdc = dev_get_drvdata(dev->parent);
 
-       chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
-       if (!chip)
+       atmel = devm_kzalloc(dev, sizeof(*atmel), GFP_KERNEL);
+       if (!atmel)
                return -ENOMEM;
 
        ret = clk_prepare_enable(hlcdc->periph_clk);
@@ -260,31 +262,31 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
 
        match = of_match_node(atmel_hlcdc_dt_ids, dev->parent->of_node);
        if (match)
-               chip->errata = match->data;
+               atmel->errata = match->data;
 
-       chip->hlcdc = hlcdc;
-       chip->chip.ops = &atmel_hlcdc_pwm_ops;
-       chip->chip.dev = dev;
-       chip->chip.npwm = 1;
+       atmel->hlcdc = hlcdc;
+       atmel->chip.ops = &atmel_hlcdc_pwm_ops;
+       atmel->chip.dev = dev;
+       atmel->chip.npwm = 1;
 
-       ret = pwmchip_add(&chip->chip);
+       ret = pwmchip_add(&atmel->chip);
        if (ret) {
                clk_disable_unprepare(hlcdc->periph_clk);
                return ret;
        }
 
-       platform_set_drvdata(pdev, chip);
+       platform_set_drvdata(pdev, atmel);
 
        return 0;
 }
 
 static void atmel_hlcdc_pwm_remove(struct platform_device *pdev)
 {
-       struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev);
+       struct atmel_hlcdc_pwm *atmel = platform_get_drvdata(pdev);
 
-       pwmchip_remove(&chip->chip);
+       pwmchip_remove(&atmel->chip);
 
-       clk_disable_unprepare(chip->hlcdc->periph_clk);
+       clk_disable_unprepare(atmel->hlcdc->periph_clk);
 }
 
 static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = {
index 4a116dc..c00dd37 100644 (file)
@@ -19,8 +19,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <soc/at91/atmel_tcb.h>
@@ -34,7 +33,6 @@
                                 ATMEL_TC_BEEVT | ATMEL_TC_BSWTRG)
 
 struct atmel_tcb_pwm_device {
-       enum pwm_polarity polarity;     /* PWM polarity */
        unsigned div;                   /* PWM clock divider */
        unsigned duty;                  /* PWM duty expressed in clk cycles */
        unsigned period;                /* PWM period expressed in clk cycles */
@@ -57,7 +55,7 @@ struct atmel_tcb_pwm_chip {
        struct clk *clk;
        struct clk *gclk;
        struct clk *slow_clk;
-       struct atmel_tcb_pwm_device *pwms[NPWM];
+       struct atmel_tcb_pwm_device pwms[NPWM];
        struct atmel_tcb_channel bkup;
 };
 
@@ -68,37 +66,18 @@ static inline struct atmel_tcb_pwm_chip *to_tcb_chip(struct pwm_chip *chip)
        return container_of(chip, struct atmel_tcb_pwm_chip, chip);
 }
 
-static int atmel_tcb_pwm_set_polarity(struct pwm_chip *chip,
-                                     struct pwm_device *pwm,
-                                     enum pwm_polarity polarity)
-{
-       struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
-
-       tcbpwm->polarity = polarity;
-
-       return 0;
-}
-
 static int atmel_tcb_pwm_request(struct pwm_chip *chip,
                                 struct pwm_device *pwm)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm;
+       struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
        unsigned cmr;
        int ret;
 
-       tcbpwm = devm_kzalloc(chip->dev, sizeof(*tcbpwm), GFP_KERNEL);
-       if (!tcbpwm)
-               return -ENOMEM;
-
        ret = clk_prepare_enable(tcbpwmc->clk);
-       if (ret) {
-               devm_kfree(chip->dev, tcbpwm);
+       if (ret)
                return ret;
-       }
 
-       tcbpwm->polarity = PWM_POLARITY_NORMAL;
        tcbpwm->duty = 0;
        tcbpwm->period = 0;
        tcbpwm->div = 0;
@@ -131,27 +110,22 @@ static int atmel_tcb_pwm_request(struct pwm_chip *chip,
        regmap_write(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CMR), cmr);
        spin_unlock(&tcbpwmc->lock);
 
-       tcbpwmc->pwms[pwm->hwpwm] = tcbpwm;
-
        return 0;
 }
 
 static void atmel_tcb_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
 
        clk_disable_unprepare(tcbpwmc->clk);
-       tcbpwmc->pwms[pwm->hwpwm] = NULL;
-       devm_kfree(chip->dev, tcbpwm);
 }
 
-static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm,
+                                 enum pwm_polarity polarity)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
+       struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
        unsigned cmr;
-       enum pwm_polarity polarity = tcbpwm->polarity;
 
        /*
         * If duty is 0 the timer will be stopped and we have to
@@ -203,12 +177,12 @@ static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
        spin_unlock(&tcbpwmc->lock);
 }
 
-static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
+                               enum pwm_polarity polarity)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
+       struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
        u32 cmr;
-       enum pwm_polarity polarity = tcbpwm->polarity;
 
        /*
         * If duty is 0 the timer will be stopped and we have to
@@ -291,7 +265,7 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
                                int duty_ns, int period_ns)
 {
        struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
-       struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
+       struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
        struct atmel_tcb_pwm_device *atcbpwm = NULL;
        int i = 0;
        int slowclk = 0;
@@ -338,9 +312,9 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        period = div_u64(period_ns, min);
 
        if (pwm->hwpwm == 0)
-               atcbpwm = tcbpwmc->pwms[1];
+               atcbpwm = &tcbpwmc->pwms[1];
        else
-               atcbpwm = tcbpwmc->pwms[0];
+               atcbpwm = &tcbpwmc->pwms[0];
 
        /*
         * PWM devices provided by the TCB driver are grouped by 2.
@@ -371,11 +345,8 @@ static int atmel_tcb_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        int duty_cycle, period;
        int ret;
 
-       /* This function only sets a flag in driver data */
-       atmel_tcb_pwm_set_polarity(chip, pwm, state->polarity);
-
        if (!state->enabled) {
-               atmel_tcb_pwm_disable(chip, pwm);
+               atmel_tcb_pwm_disable(chip, pwm, state->polarity);
                return 0;
        }
 
@@ -386,7 +357,7 @@ static int atmel_tcb_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        if (ret)
                return ret;
 
-       return atmel_tcb_pwm_enable(chip, pwm);
+       return atmel_tcb_pwm_enable(chip, pwm, state->polarity);
 }
 
 static const struct pwm_ops atmel_tcb_pwm_ops = {
@@ -422,13 +393,14 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
        struct atmel_tcb_pwm_chip *tcbpwm;
        const struct atmel_tcb_config *config;
        struct device_node *np = pdev->dev.of_node;
-       struct regmap *regmap;
-       struct clk *clk, *gclk = NULL;
-       struct clk *slow_clk;
        char clk_name[] = "t0_clk";
        int err;
        int channel;
 
+       tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
+       if (tcbpwm == NULL)
+               return -ENOMEM;
+
        err = of_property_read_u32(np, "reg", &channel);
        if (err < 0) {
                dev_err(&pdev->dev,
@@ -437,49 +409,43 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
                return err;
        }
 
-       regmap = syscon_node_to_regmap(np->parent);
-       if (IS_ERR(regmap))
-               return PTR_ERR(regmap);
+       tcbpwm->regmap = syscon_node_to_regmap(np->parent);
+       if (IS_ERR(tcbpwm->regmap))
+               return PTR_ERR(tcbpwm->regmap);
 
-       slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
-       if (IS_ERR(slow_clk))
-               return PTR_ERR(slow_clk);
+       tcbpwm->slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
+       if (IS_ERR(tcbpwm->slow_clk))
+               return PTR_ERR(tcbpwm->slow_clk);
 
        clk_name[1] += channel;
-       clk = of_clk_get_by_name(np->parent, clk_name);
-       if (IS_ERR(clk))
-               clk = of_clk_get_by_name(np->parent, "t0_clk");
-       if (IS_ERR(clk))
-               return PTR_ERR(clk);
+       tcbpwm->clk = of_clk_get_by_name(np->parent, clk_name);
+       if (IS_ERR(tcbpwm->clk))
+               tcbpwm->clk = of_clk_get_by_name(np->parent, "t0_clk");
+       if (IS_ERR(tcbpwm->clk)) {
+               err = PTR_ERR(tcbpwm->clk);
+               goto err_slow_clk;
+       }
 
        match = of_match_node(atmel_tcb_of_match, np->parent);
        config = match->data;
 
        if (config->has_gclk) {
-               gclk = of_clk_get_by_name(np->parent, "gclk");
-               if (IS_ERR(gclk))
-                       return PTR_ERR(gclk);
-       }
-
-       tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
-       if (tcbpwm == NULL) {
-               err = -ENOMEM;
-               goto err_slow_clk;
+               tcbpwm->gclk = of_clk_get_by_name(np->parent, "gclk");
+               if (IS_ERR(tcbpwm->gclk)) {
+                       err = PTR_ERR(tcbpwm->gclk);
+                       goto err_clk;
+               }
        }
 
        tcbpwm->chip.dev = &pdev->dev;
        tcbpwm->chip.ops = &atmel_tcb_pwm_ops;
        tcbpwm->chip.npwm = NPWM;
        tcbpwm->channel = channel;
-       tcbpwm->regmap = regmap;
-       tcbpwm->clk = clk;
-       tcbpwm->gclk = gclk;
-       tcbpwm->slow_clk = slow_clk;
        tcbpwm->width = config->counter_width;
 
-       err = clk_prepare_enable(slow_clk);
+       err = clk_prepare_enable(tcbpwm->slow_clk);
        if (err)
-               goto err_slow_clk;
+               goto err_gclk;
 
        spin_lock_init(&tcbpwm->lock);
 
@@ -494,8 +460,14 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 err_disable_clk:
        clk_disable_unprepare(tcbpwm->slow_clk);
 
+err_gclk:
+       clk_put(tcbpwm->gclk);
+
+err_clk:
+       clk_put(tcbpwm->clk);
+
 err_slow_clk:
-       clk_put(slow_clk);
+       clk_put(tcbpwm->slow_clk);
 
        return err;
 }
@@ -507,8 +479,9 @@ static void atmel_tcb_pwm_remove(struct platform_device *pdev)
        pwmchip_remove(&tcbpwm->chip);
 
        clk_disable_unprepare(tcbpwm->slow_clk);
-       clk_put(tcbpwm->slow_clk);
+       clk_put(tcbpwm->gclk);
        clk_put(tcbpwm->clk);
+       clk_put(tcbpwm->slow_clk);
 }
 
 static const struct of_device_id atmel_tcb_pwm_dt_ids[] = {
index 5f7d286..1f73325 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
@@ -36,7 +35,7 @@
 #define PWM_SR                 0x0C
 #define PWM_ISR                        0x1C
 /* Bit field in SR */
-#define PWM_SR_ALL_CH_ON       0x0F
+#define PWM_SR_ALL_CH_MASK     0x0F
 
 /* The following register is PWM channel related registers */
 #define PWM_CH_REG_OFFSET      0x200
@@ -464,6 +463,42 @@ static const struct of_device_id atmel_pwm_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids);
 
+static int atmel_pwm_enable_clk_if_on(struct atmel_pwm_chip *atmel_pwm, bool on)
+{
+       unsigned int i, cnt = 0;
+       unsigned long sr;
+       int ret = 0;
+
+       sr = atmel_pwm_readl(atmel_pwm, PWM_SR) & PWM_SR_ALL_CH_MASK;
+       if (!sr)
+               return 0;
+
+       cnt = bitmap_weight(&sr, atmel_pwm->chip.npwm);
+
+       if (!on)
+               goto disable_clk;
+
+       for (i = 0; i < cnt; i++) {
+               ret = clk_enable(atmel_pwm->clk);
+               if (ret) {
+                       dev_err(atmel_pwm->chip.dev,
+                               "failed to enable clock for pwm %pe\n",
+                               ERR_PTR(ret));
+
+                       cnt = i;
+                       goto disable_clk;
+               }
+       }
+
+       return 0;
+
+disable_clk:
+       while (cnt--)
+               clk_disable(atmel_pwm->clk);
+
+       return ret;
+}
+
 static int atmel_pwm_probe(struct platform_device *pdev)
 {
        struct atmel_pwm_chip *atmel_pwm;
@@ -482,51 +517,39 @@ static int atmel_pwm_probe(struct platform_device *pdev)
        if (IS_ERR(atmel_pwm->base))
                return PTR_ERR(atmel_pwm->base);
 
-       atmel_pwm->clk = devm_clk_get(&pdev->dev, NULL);
+       atmel_pwm->clk = devm_clk_get_prepared(&pdev->dev, NULL);
        if (IS_ERR(atmel_pwm->clk))
-               return PTR_ERR(atmel_pwm->clk);
-
-       ret = clk_prepare(atmel_pwm->clk);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to prepare PWM clock\n");
-               return ret;
-       }
+               return dev_err_probe(&pdev->dev, PTR_ERR(atmel_pwm->clk),
+                                    "failed to get prepared PWM clock\n");
 
        atmel_pwm->chip.dev = &pdev->dev;
        atmel_pwm->chip.ops = &atmel_pwm_ops;
        atmel_pwm->chip.npwm = 4;
 
-       ret = pwmchip_add(&atmel_pwm->chip);
+       ret = atmel_pwm_enable_clk_if_on(atmel_pwm, true);
+       if (ret < 0)
+               return ret;
+
+       ret = devm_pwmchip_add(&pdev->dev, &atmel_pwm->chip);
        if (ret < 0) {
-               dev_err(&pdev->dev, "failed to add PWM chip %d\n", ret);
-               goto unprepare_clk;
+               dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
+               goto disable_clk;
        }
 
-       platform_set_drvdata(pdev, atmel_pwm);
+       return 0;
 
-       return ret;
+disable_clk:
+       atmel_pwm_enable_clk_if_on(atmel_pwm, false);
 
-unprepare_clk:
-       clk_unprepare(atmel_pwm->clk);
        return ret;
 }
 
-static void atmel_pwm_remove(struct platform_device *pdev)
-{
-       struct atmel_pwm_chip *atmel_pwm = platform_get_drvdata(pdev);
-
-       pwmchip_remove(&atmel_pwm->chip);
-
-       clk_unprepare(atmel_pwm->clk);
-}
-
 static struct platform_driver atmel_pwm_driver = {
        .driver = {
                .name = "atmel-pwm",
                .of_match_table = of_match_ptr(atmel_pwm_dt_ids),
        },
        .probe = atmel_pwm_probe,
-       .remove_new = atmel_pwm_remove,
 };
 module_platform_driver(atmel_pwm_driver);
 
index 4fa6e24..e5b00cc 100644 (file)
@@ -61,9 +61,9 @@ struct kona_pwmc {
        struct clk *clk;
 };
 
-static inline struct kona_pwmc *to_kona_pwmc(struct pwm_chip *_chip)
+static inline struct kona_pwmc *to_kona_pwmc(struct pwm_chip *chip)
 {
-       return container_of(_chip, struct kona_pwmc, chip);
+       return container_of(chip, struct kona_pwmc, chip);
 }
 
 /*
index 0c5992a..0971c66 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
index 4703b4a..b9f063d 100644 (file)
@@ -34,9 +34,9 @@ struct crystalcove_pwm {
        struct regmap *regmap;
 };
 
-static inline struct crystalcove_pwm *to_crc_pwm(struct pwm_chip *pc)
+static inline struct crystalcove_pwm *to_crc_pwm(struct pwm_chip *chip)
 {
-       return container_of(pc, struct crystalcove_pwm, chip);
+       return container_of(chip, struct crystalcove_pwm, chip);
 }
 
 static int crc_pwm_calc_clk_div(int period_ns)
index 74e863a..baaac0c 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_data/cros_ec_commands.h>
 #include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
@@ -37,9 +38,9 @@ struct cros_ec_pwm {
        u16 duty_cycle;
 };
 
-static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *c)
+static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *chip)
 {
-       return container_of(c, struct cros_ec_pwm_device, chip);
+       return container_of(chip, struct cros_ec_pwm_device, chip);
 }
 
 static int cros_ec_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
@@ -218,14 +219,14 @@ static int cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static struct pwm_device *
-cros_ec_pwm_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
+cros_ec_pwm_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
 {
        struct pwm_device *pwm;
 
-       if (args->args[0] >= pc->npwm)
+       if (args->args[0] >= chip->npwm)
                return ERR_PTR(-EINVAL);
 
-       pwm = pwm_request_from_chip(pc, args->args[0], NULL);
+       pwm = pwm_request_from_chip(chip, args->args[0], NULL);
        if (IS_ERR(pwm))
                return pwm;
 
index 5caadbd..b7c6045 100644 (file)
@@ -11,8 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/pwm.h>
index b95df1a..f7ba6fe 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/reset.h>
index 1f2eb1c..0651983 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
index 3b7067f..ef1293f 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/mfd/ingenic-tcu.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
index 35675e4..4b133a1 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/err.h>
 #include <linux/mfd/lp3943.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
@@ -24,9 +25,9 @@ struct lp3943_pwm {
        struct lp3943_platform_data *pdata;
 };
 
-static inline struct lp3943_pwm *to_lp3943_pwm(struct pwm_chip *_chip)
+static inline struct lp3943_pwm *to_lp3943_pwm(struct pwm_chip *chip)
 {
-       return container_of(_chip, struct lp3943_pwm, chip);
+       return container_of(chip, struct lp3943_pwm, chip);
 }
 
 static struct lp3943_pwm_map *
index b9bf5b3..7a19a84 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
@@ -366,30 +367,21 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev)
        if (IS_ERR(lpc18xx_pwm->base))
                return PTR_ERR(lpc18xx_pwm->base);
 
-       lpc18xx_pwm->pwm_clk = devm_clk_get(&pdev->dev, "pwm");
+       lpc18xx_pwm->pwm_clk = devm_clk_get_enabled(&pdev->dev, "pwm");
        if (IS_ERR(lpc18xx_pwm->pwm_clk))
                return dev_err_probe(&pdev->dev, PTR_ERR(lpc18xx_pwm->pwm_clk),
                                     "failed to get pwm clock\n");
 
-       ret = clk_prepare_enable(lpc18xx_pwm->pwm_clk);
-       if (ret < 0)
-               return dev_err_probe(&pdev->dev, ret,
-                                    "could not prepare or enable pwm clock\n");
-
        lpc18xx_pwm->clk_rate = clk_get_rate(lpc18xx_pwm->pwm_clk);
-       if (!lpc18xx_pwm->clk_rate) {
-               ret = dev_err_probe(&pdev->dev,
-                                   -EINVAL, "pwm clock has no frequency\n");
-               goto disable_pwmclk;
-       }
+       if (!lpc18xx_pwm->clk_rate)
+               return dev_err_probe(&pdev->dev,
+                                    -EINVAL, "pwm clock has no frequency\n");
 
        /*
         * If clkrate is too fast, the calculations in .apply() might overflow.
         */
-       if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC) {
-               ret = dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n");
-               goto disable_pwmclk;
-       }
+       if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC)
+               return dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n");
 
        mutex_init(&lpc18xx_pwm->res_lock);
        mutex_init(&lpc18xx_pwm->period_lock);
@@ -435,18 +427,12 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev)
        lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL, val);
 
        ret = pwmchip_add(&lpc18xx_pwm->chip);
-       if (ret < 0) {
-               dev_err_probe(&pdev->dev, ret, "pwmchip_add failed\n");
-               goto disable_pwmclk;
-       }
+       if (ret < 0)
+               return dev_err_probe(&pdev->dev, ret, "pwmchip_add failed\n");
 
        platform_set_drvdata(pdev, lpc18xx_pwm);
 
        return 0;
-
-disable_pwmclk:
-       clk_disable_unprepare(lpc18xx_pwm->pwm_clk);
-       return ret;
 }
 
 static void lpc18xx_pwm_remove(struct platform_device *pdev)
@@ -459,8 +445,6 @@ static void lpc18xx_pwm_remove(struct platform_device *pdev)
        val = lpc18xx_pwm_readl(lpc18xx_pwm, LPC18XX_PWM_CTRL);
        lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL,
                           val | LPC18XX_PWM_CTRL_HALT);
-
-       clk_disable_unprepare(lpc18xx_pwm->pwm_clk);
 }
 
 static struct platform_driver lpc18xx_pwm_driver = {
index 86a0ea0..806f0bb 100644 (file)
@@ -51,10 +51,10 @@ static int lpc32xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
        if (duty_cycles > 255)
                duty_cycles = 255;
 
-       val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+       val = readl(lpc32xx->base);
        val &= ~0xFFFF;
        val |= (period_cycles << 8) | duty_cycles;
-       writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+       writel(val, lpc32xx->base);
 
        return 0;
 }
@@ -69,9 +69,9 @@ static int lpc32xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
        if (ret)
                return ret;
 
-       val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+       val = readl(lpc32xx->base);
        val |= PWM_ENABLE;
-       writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+       writel(val, lpc32xx->base);
 
        return 0;
 }
@@ -81,9 +81,9 @@ static void lpc32xx_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
        struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip);
        u32 val;
 
-       val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+       val = readl(lpc32xx->base);
        val &= ~PWM_ENABLE;
-       writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+       writel(val, lpc32xx->base);
 
        clk_disable_unprepare(lpc32xx->clk);
 }
@@ -141,9 +141,9 @@ static int lpc32xx_pwm_probe(struct platform_device *pdev)
        lpc32xx->chip.npwm = 1;
 
        /* If PWM is disabled, configure the output to the default value */
-       val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
+       val = readl(lpc32xx->base);
        val &= ~PWM_PIN_LEVEL;
-       writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
+       writel(val, lpc32xx->base);
 
        ret = devm_pwmchip_add(&pdev->dev, &lpc32xx->chip);
        if (ret < 0) {
index 7a51d21..6adb0ed 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/clk.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
index 22f54db..25519cd 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
index 8750b57..e7525c9 100644 (file)
@@ -37,7 +37,7 @@
 #include <linux/math.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 
index 2401b67..a83bd6e 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
index ab63b08..7514ea3 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/types.h>
 
 struct ntxec_pwm {
-       struct device *dev;
        struct ntxec *ec;
        struct pwm_chip chip;
 };
@@ -141,14 +140,13 @@ static int ntxec_pwm_probe(struct platform_device *pdev)
        struct ntxec_pwm *priv;
        struct pwm_chip *chip;
 
-       pdev->dev.of_node = pdev->dev.parent->of_node;
+       device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
 
        priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
 
        priv->ec = ec;
-       priv->dev = &pdev->dev;
 
        chip = &priv->chip;
        chip->dev = &pdev->dev;
index 762429d..1e475ed 100644 (file)
@@ -15,6 +15,7 @@
  *   input clock (PWMCR_SD is set) and the output is driven to inactive.
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
@@ -156,13 +157,6 @@ MODULE_DEVICE_TABLE(of, pwm_of_match);
 #define pwm_of_match NULL
 #endif
 
-static const struct platform_device_id *pxa_pwm_get_id_dt(struct device *dev)
-{
-       const struct of_device_id *id = of_match_device(pwm_of_match, dev);
-
-       return id ? id->data : NULL;
-}
-
 static int pwm_probe(struct platform_device *pdev)
 {
        const struct platform_device_id *id = platform_get_device_id(pdev);
@@ -170,7 +164,7 @@ static int pwm_probe(struct platform_device *pdev)
        int ret = 0;
 
        if (IS_ENABLED(CONFIG_OF) && id == NULL)
-               id = pxa_pwm_get_id_dt(&pdev->dev);
+               id = of_device_get_match_data(&pdev->dev);
 
        if (id == NULL)
                return -EINVAL;
index c1a1f2d..03ee18f 100644 (file)
@@ -52,9 +52,9 @@ struct rockchip_pwm_data {
        u32 enable_conf;
 };
 
-static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c)
+static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *chip)
 {
-       return container_of(c, struct rockchip_pwm_chip, chip);
+       return container_of(chip, struct rockchip_pwm_chip, chip);
 }
 
 static int rockchip_pwm_get_state(struct pwm_chip *chip,
index bed8bd6..a56cecb 100644 (file)
@@ -40,7 +40,7 @@
  * struct rz_mtu3_channel_io_map - MTU3 pwm channel map
  *
  * @base_pwm_number: First PWM of a channel
- * @num: number of IOs on the HW channel.
+ * @num_channel_ios: number of IOs on the HW channel.
  */
 struct rz_mtu3_channel_io_map {
        u8 base_pwm_number;
index ae49d67..eabddb7 100644 (file)
@@ -13,6 +13,7 @@
  */
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
@@ -51,9 +52,9 @@ struct pwm_sifive_ddata {
 };
 
 static inline
-struct pwm_sifive_ddata *pwm_sifive_chip_to_ddata(struct pwm_chip *c)
+struct pwm_sifive_ddata *pwm_sifive_chip_to_ddata(struct pwm_chip *chip)
 {
-       return container_of(c, struct pwm_sifive_ddata, chip);
+       return container_of(chip, struct pwm_sifive_ddata, chip);
 }
 
 static int pwm_sifive_request(struct pwm_chip *chip, struct pwm_device *pwm)
index e64900a..9e42e3a 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/pwm.h>
 #include <linux/regmap.h>
 
        regmap_write((priv)->regmap, (priv)->offset + (reg), (val))
 
 struct sl28cpld_pwm {
-       struct pwm_chip pwm_chip;
+       struct pwm_chip chip;
        struct regmap *regmap;
        u32 offset;
 };
-#define sl28cpld_pwm_from_chip(_chip) \
-       container_of(_chip, struct sl28cpld_pwm, pwm_chip)
+
+static inline struct sl28cpld_pwm *sl28cpld_pwm_from_chip(struct pwm_chip *chip)
+{
+       return container_of(chip, struct sl28cpld_pwm, chip);
+}
 
 static int sl28cpld_pwm_get_state(struct pwm_chip *chip,
                                  struct pwm_device *pwm,
@@ -228,12 +232,12 @@ static int sl28cpld_pwm_probe(struct platform_device *pdev)
        }
 
        /* Initialize the pwm_chip structure */
-       chip = &priv->pwm_chip;
+       chip = &priv->chip;
        chip->dev = &pdev->dev;
        chip->ops = &sl28cpld_pwm_ops;
        chip->npwm = 1;
 
-       ret = devm_pwmchip_add(&pdev->dev, &priv->pwm_chip);
+       ret = devm_pwmchip_add(&pdev->dev, chip);
        if (ret) {
                dev_err(&pdev->dev, "failed to add PWM chip (%pe)",
                        ERR_PTR(ret));
index d43a6fa..1499c8c 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/math64.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
index 62e397a..3d6be77 100644 (file)
@@ -637,7 +637,7 @@ static int stm32_pwm_probe(struct platform_device *pdev)
        priv->chip.ops = &stm32pwm_ops;
        priv->chip.npwm = stm32_pwm_detect_channels(priv);
 
-       ret = pwmchip_add(&priv->chip);
+       ret = devm_pwmchip_add(dev, &priv->chip);
        if (ret < 0)
                return ret;
 
@@ -646,17 +646,6 @@ static int stm32_pwm_probe(struct platform_device *pdev)
        return 0;
 }
 
-static void stm32_pwm_remove(struct platform_device *pdev)
-{
-       struct stm32_pwm *priv = platform_get_drvdata(pdev);
-       unsigned int i;
-
-       for (i = 0; i < priv->chip.npwm; i++)
-               pwm_disable(&priv->chip.pwms[i]);
-
-       pwmchip_remove(&priv->chip);
-}
-
 static int __maybe_unused stm32_pwm_suspend(struct device *dev)
 {
        struct stm32_pwm *priv = dev_get_drvdata(dev);
@@ -701,7 +690,6 @@ MODULE_DEVICE_TABLE(of, stm32_pwm_of_match);
 
 static struct platform_driver stm32_pwm_driver = {
        .probe  = stm32_pwm_probe,
-       .remove_new = stm32_pwm_remove,
        .driver = {
                .name = "stm32-pwm",
                .of_match_table = stm32_pwm_of_match,
index 5d4a476..e205405 100644 (file)
@@ -61,8 +61,8 @@ static int stmpe_24xx_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
        return 0;
 }
 
-static void stmpe_24xx_pwm_disable(struct pwm_chip *chip,
-                                  struct pwm_device *pwm)
+static int stmpe_24xx_pwm_disable(struct pwm_chip *chip,
+                                 struct pwm_device *pwm)
 {
        struct stmpe_pwm *stmpe_pwm = to_stmpe_pwm(chip);
        u8 value;
@@ -72,17 +72,16 @@ static void stmpe_24xx_pwm_disable(struct pwm_chip *chip,
        if (ret < 0) {
                dev_err(chip->dev, "error reading PWM#%u control\n",
                        pwm->hwpwm);
-               return;
+               return ret;
        }
 
        value = ret & ~BIT(pwm->hwpwm);
 
        ret = stmpe_reg_write(stmpe_pwm->stmpe, STMPE24XX_PWMCS, value);
-       if (ret) {
+       if (ret)
                dev_err(chip->dev, "error writing PWM#%u control\n",
                        pwm->hwpwm);
-               return;
-       }
+       return ret;
 }
 
 /* STMPE 24xx PWM instructions */
@@ -111,7 +110,9 @@ static int stmpe_24xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 
        /* Make sure we are disabled */
        if (pwm_is_enabled(pwm)) {
-               stmpe_24xx_pwm_disable(chip, pwm);
+               ret = stmpe_24xx_pwm_disable(chip, pwm);
+               if (ret)
+                       return ret;
        } else {
                /* Connect the PWM to the pin */
                pin = pwm->hwpwm;
@@ -269,7 +270,7 @@ static int stmpe_24xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 
        if (!state->enabled) {
                if (pwm->state.enabled)
-                       stmpe_24xx_pwm_disable(chip, pwm);
+                       return stmpe_24xx_pwm_disable(chip, pwm);
 
                return 0;
        }
index a8790a8..c84fcf1 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/reset.h>
index d6ebe9f..7705c7b 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
index 5810abf..a169a34 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/pm_opp.h>
 #include <linux/pwm.h>
 #include <linux/platform_device.h>
index 1094499..8c94b26 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 /* ECAP registers and bits definitions */
 #define CAP1                   0x08
index bb3959a..ecbfd7e 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 /* EHRPWM registers and bits definitions */
 
index e3fb79b..7f7591a 100644 (file)
@@ -21,7 +21,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 
index d2c48fd..6d46db5 100644 (file)
@@ -6,6 +6,7 @@
  * Copyright (C) 2010 Alexey Charkov <alchark@gmail.com>
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 
 #include <asm/div64.h>
 
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
-
 /*
  * SoC architecture allocates register space for 4 PWMs but only
  * 2 are currently implemented.
index d022184..9b7c3d7 100644 (file)
@@ -119,7 +119,7 @@ static struct regulator_desc tps6287x_reg = {
        .ramp_mask = TPS6287X_CTRL1_VRAMP,
        .ramp_delay_table = tps6287x_ramp_table,
        .n_ramp_values = ARRAY_SIZE(tps6287x_ramp_table),
-       .n_voltages = 256,
+       .n_voltages = 256 * ARRAY_SIZE(tps6287x_voltage_ranges),
        .linear_ranges = tps6287x_voltage_ranges,
        .n_linear_ranges = ARRAY_SIZE(tps6287x_voltage_ranges),
        .linear_range_selectors_bitfield = tps6287x_voltage_range_sel,
index 25ef102..b7f0c87 100644 (file)
@@ -384,21 +384,19 @@ static int tps6594_request_reg_irqs(struct platform_device *pdev,
                if (irq < 0)
                        return -EINVAL;
 
-               irq_data[*irq_idx + j].dev = tps->dev;
-               irq_data[*irq_idx + j].type = irq_type;
-               irq_data[*irq_idx + j].rdev = rdev;
+               irq_data[*irq_idx].dev = tps->dev;
+               irq_data[*irq_idx].type = irq_type;
+               irq_data[*irq_idx].rdev = rdev;
 
                error = devm_request_threaded_irq(tps->dev, irq, NULL,
-                                                 tps6594_regulator_irq_handler,
-                                                 IRQF_ONESHOT,
-                                                 irq_type->irq_name,
-                                                 &irq_data[*irq_idx]);
-               (*irq_idx)++;
+                                                 tps6594_regulator_irq_handler, IRQF_ONESHOT,
+                                                 irq_type->irq_name, &irq_data[*irq_idx]);
                if (error) {
                        dev_err(tps->dev, "tps6594 failed to request %s IRQ %d: %d\n",
                                irq_type->irq_name, irq, error);
                        return error;
                }
+               (*irq_idx)++;
        }
        return 0;
 }
@@ -420,8 +418,8 @@ static int tps6594_regulator_probe(struct platform_device *pdev)
        int error, i, irq, multi, delta;
        int irq_idx = 0;
        int buck_idx = 0;
-       int ext_reg_irq_nb = 2;
-
+       size_t ext_reg_irq_nb = 2;
+       size_t reg_irq_nb;
        enum {
                MULTI_BUCK12,
                MULTI_BUCK123,
@@ -484,15 +482,16 @@ static int tps6594_regulator_probe(struct platform_device *pdev)
                }
        }
 
-       if (tps->chip_id == LP8764)
+       if (tps->chip_id == LP8764) {
                /* There is only 4 buck on LP8764 */
                buck_configured[4] = 1;
+               reg_irq_nb = size_mul(REGS_INT_NB, (BUCK_NB - 1));
+       } else {
+               reg_irq_nb = size_mul(REGS_INT_NB, (size_add(BUCK_NB, LDO_NB)));
+       }
 
-       irq_data = devm_kmalloc_array(tps->dev,
-                               REGS_INT_NB * sizeof(struct tps6594_regulator_irq_data),
-                               ARRAY_SIZE(tps6594_bucks_irq_types) +
-                               ARRAY_SIZE(tps6594_ldos_irq_types),
-                               GFP_KERNEL);
+       irq_data = devm_kmalloc_array(tps->dev, reg_irq_nb,
+                                     sizeof(struct tps6594_regulator_irq_data), GFP_KERNEL);
        if (!irq_data)
                return -ENOMEM;
 
index d95fa55..8fcda9b 100644 (file)
@@ -12,8 +12,7 @@
 #include <linux/mailbox_client.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
index f9874fc..8bb293b 100644 (file)
@@ -13,9 +13,9 @@
 #include <linux/mailbox_client.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_reserved_mem.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/regmap.h>
 #define IMX7D_M4_STOP                  (IMX7D_ENABLE_M4 | IMX7D_SW_M4C_RST | \
                                         IMX7D_SW_M4C_NON_SCLR_RST)
 
+#define IMX8M_M7_STOP                  (IMX7D_ENABLE_M4 | IMX7D_SW_M4C_RST)
+#define IMX8M_M7_POLL                  IMX7D_ENABLE_M4
+
+#define IMX8M_GPR22                    0x58
+#define IMX8M_GPR22_CM7_CPUWAIT                BIT(0)
+
 /* Address: 0x020D8000 */
 #define IMX6SX_SRC_SCR                 0x00
 #define IMX6SX_ENABLE_M4               BIT(22)
@@ -91,6 +97,7 @@ static int imx_rproc_detach_pd(struct rproc *rproc);
 struct imx_rproc {
        struct device                   *dev;
        struct regmap                   *regmap;
+       struct regmap                   *gpr;
        struct rproc                    *rproc;
        const struct imx_rproc_dcfg     *dcfg;
        struct imx_rproc_mem            mem[IMX_RPROC_MEM_MAX];
@@ -285,6 +292,18 @@ static const struct imx_rproc_att imx_rproc_att_imx6sx[] = {
        { 0x80000000, 0x80000000, 0x60000000, 0 },
 };
 
+static const struct imx_rproc_dcfg imx_rproc_cfg_imx8mn_mmio = {
+       .src_reg        = IMX7D_SRC_SCR,
+       .src_mask       = IMX7D_M4_RST_MASK,
+       .src_start      = IMX7D_M4_START,
+       .src_stop       = IMX8M_M7_STOP,
+       .gpr_reg        = IMX8M_GPR22,
+       .gpr_wait       = IMX8M_GPR22_CM7_CPUWAIT,
+       .att            = imx_rproc_att_imx8mn,
+       .att_size       = ARRAY_SIZE(imx_rproc_att_imx8mn),
+       .method         = IMX_RPROC_MMIO,
+};
+
 static const struct imx_rproc_dcfg imx_rproc_cfg_imx8mn = {
        .att            = imx_rproc_att_imx8mn,
        .att_size       = ARRAY_SIZE(imx_rproc_att_imx8mn),
@@ -365,8 +384,14 @@ static int imx_rproc_start(struct rproc *rproc)
 
        switch (dcfg->method) {
        case IMX_RPROC_MMIO:
-               ret = regmap_update_bits(priv->regmap, dcfg->src_reg, dcfg->src_mask,
-                                        dcfg->src_start);
+               if (priv->gpr) {
+                       ret = regmap_clear_bits(priv->gpr, dcfg->gpr_reg,
+                                               dcfg->gpr_wait);
+               } else {
+                       ret = regmap_update_bits(priv->regmap, dcfg->src_reg,
+                                                dcfg->src_mask,
+                                                dcfg->src_start);
+               }
                break;
        case IMX_RPROC_SMC:
                arm_smccc_smc(IMX_SIP_RPROC, IMX_SIP_RPROC_START, 0, 0, 0, 0, 0, 0, &res);
@@ -395,6 +420,16 @@ static int imx_rproc_stop(struct rproc *rproc)
 
        switch (dcfg->method) {
        case IMX_RPROC_MMIO:
+               if (priv->gpr) {
+                       ret = regmap_set_bits(priv->gpr, dcfg->gpr_reg,
+                                             dcfg->gpr_wait);
+                       if (ret) {
+                               dev_err(priv->dev,
+                                       "Failed to quiescence M4 platform!\n");
+                               return ret;
+                       }
+               }
+
                ret = regmap_update_bits(priv->regmap, dcfg->src_reg, dcfg->src_mask,
                                         dcfg->src_stop);
                break;
@@ -725,13 +760,22 @@ static int imx_rproc_addr_init(struct imx_rproc *priv,
        return 0;
 }
 
+static int imx_rproc_notified_idr_cb(int id, void *ptr, void *data)
+{
+       struct rproc *rproc = data;
+
+       rproc_vq_interrupt(rproc, id);
+
+       return 0;
+}
+
 static void imx_rproc_vq_work(struct work_struct *work)
 {
        struct imx_rproc *priv = container_of(work, struct imx_rproc,
                                              rproc_work);
+       struct rproc *rproc = priv->rproc;
 
-       rproc_vq_interrupt(priv->rproc, 0);
-       rproc_vq_interrupt(priv->rproc, 1);
+       idr_for_each(&rproc->notifyids, imx_rproc_notified_idr_cb, rproc);
 }
 
 static void imx_rproc_rx_callback(struct mbox_client *cl, void *msg)
@@ -983,6 +1027,10 @@ static int imx_rproc_detect_mode(struct imx_rproc *priv)
                break;
        }
 
+       priv->gpr = syscon_regmap_lookup_by_phandle(dev->of_node, "fsl,iomuxc-gpr");
+       if (IS_ERR(priv->gpr))
+               priv->gpr = NULL;
+
        regmap = syscon_regmap_lookup_by_phandle(dev->of_node, "syscon");
        if (IS_ERR(regmap)) {
                dev_err(dev, "failed to find syscon\n");
@@ -992,6 +1040,19 @@ static int imx_rproc_detect_mode(struct imx_rproc *priv)
        priv->regmap = regmap;
        regmap_attach_dev(dev, regmap, &config);
 
+       if (priv->gpr) {
+               ret = regmap_read(priv->gpr, dcfg->gpr_reg, &val);
+               if (val & dcfg->gpr_wait) {
+                       /*
+                        * After cold boot, the CM indicates its in wait
+                        * state, but not fully powered off. Power it off
+                        * fully so firmware can be loaded into it.
+                        */
+                       imx_rproc_stop(priv->rproc);
+                       return 0;
+               }
+       }
+
        ret = regmap_read(regmap, dcfg->src_reg, &val);
        if (ret) {
                dev_err(dev, "Failed to read src\n");
@@ -1133,6 +1194,8 @@ static const struct of_device_id imx_rproc_of_match[] = {
        { .compatible = "fsl,imx8mm-cm4", .data = &imx_rproc_cfg_imx8mq },
        { .compatible = "fsl,imx8mn-cm7", .data = &imx_rproc_cfg_imx8mn },
        { .compatible = "fsl,imx8mp-cm7", .data = &imx_rproc_cfg_imx8mn },
+       { .compatible = "fsl,imx8mn-cm7-mmio", .data = &imx_rproc_cfg_imx8mn_mmio },
+       { .compatible = "fsl,imx8mp-cm7-mmio", .data = &imx_rproc_cfg_imx8mn_mmio },
        { .compatible = "fsl,imx8qxp-cm4", .data = &imx_rproc_cfg_imx8qxp },
        { .compatible = "fsl,imx8qm-cm4", .data = &imx_rproc_cfg_imx8qm },
        { .compatible = "fsl,imx8ulp-cm33", .data = &imx_rproc_cfg_imx8ulp },
index 1c7e212..79a1b89 100644 (file)
@@ -31,6 +31,8 @@ struct imx_rproc_dcfg {
        u32                             src_mask;
        u32                             src_start;
        u32                             src_stop;
+       u32                             gpr_reg;
+       u32                             gpr_wait;
        const struct imx_rproc_att      *att;
        size_t                          att_size;
        enum imx_rproc_method           method;
index 82ed90f..8f50ab8 100644 (file)
@@ -19,7 +19,8 @@
 #include <linux/clk/ti.h>
 #include <linux/err.h>
 #include <linux/io.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
index 2874c8d..327f0c7 100644 (file)
@@ -16,8 +16,9 @@
 #include <linux/debugfs.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <linux/remoteproc/pruss.h>
 #include <linux/pruss_driver.h>
 #include <linux/remoteproc.h>
@@ -109,6 +110,7 @@ struct pru_private_data {
  * @dbg_single_step: debug state variable to set PRU into single step mode
  * @dbg_continuous: debug state variable to restore PRU execution mode
  * @evt_count: number of mapped events
+ * @gpmux_save: saved value for gpmux config
  */
 struct pru_rproc {
        int id;
@@ -127,6 +129,7 @@ struct pru_rproc {
        u32 dbg_single_step;
        u32 dbg_continuous;
        u8 evt_count;
+       u8 gpmux_save;
 };
 
 static inline u32 pru_control_read_reg(struct pru_rproc *pru, unsigned int reg)
@@ -228,6 +231,7 @@ struct rproc *pru_rproc_get(struct device_node *np, int index,
        struct device *dev;
        const char *fw_name;
        int ret;
+       u32 mux;
 
        rproc = __pru_rproc_get(np, index);
        if (IS_ERR(rproc))
@@ -252,6 +256,23 @@ struct rproc *pru_rproc_get(struct device_node *np, int index,
        if (pru_id)
                *pru_id = pru->id;
 
+       ret = pruss_cfg_get_gpmux(pru->pruss, pru->id, &pru->gpmux_save);
+       if (ret) {
+               dev_err(dev, "failed to get cfg gpmux: %d\n", ret);
+               goto err;
+       }
+
+       /* An error here is acceptable for backward compatibility */
+       ret = of_property_read_u32_index(np, "ti,pruss-gp-mux-sel", index,
+                                        &mux);
+       if (!ret) {
+               ret = pruss_cfg_set_gpmux(pru->pruss, pru->id, mux);
+               if (ret) {
+                       dev_err(dev, "failed to set cfg gpmux: %d\n", ret);
+                       goto err;
+               }
+       }
+
        ret = of_property_read_string_index(np, "firmware-name", index,
                                            &fw_name);
        if (!ret) {
@@ -290,6 +311,8 @@ void pru_rproc_put(struct rproc *rproc)
 
        pru = rproc->priv;
 
+       pruss_cfg_set_gpmux(pru->pruss, pru->id, pru->gpmux_save);
+
        pru_rproc_set_firmware(rproc, NULL);
 
        mutex_lock(&pru->lock);
index a0d4238..03e5f5d 100644 (file)
@@ -29,9 +29,9 @@
 #define MAX_NUM_OF_SS           10
 #define MAX_REGION_NAME_LENGTH  16
 #define SBL_MINIDUMP_SMEM_ID   602
-#define MD_REGION_VALID                ('V' << 24 | 'A' << 16 | 'L' << 8 | 'I' << 0)
-#define MD_SS_ENCR_DONE                ('D' << 24 | 'O' << 16 | 'N' << 8 | 'E' << 0)
-#define MD_SS_ENABLED          ('E' << 24 | 'N' << 16 | 'B' << 8 | 'L' << 0)
+#define MINIDUMP_REGION_VALID          ('V' << 24 | 'A' << 16 | 'L' << 8 | 'I' << 0)
+#define MINIDUMP_SS_ENCR_DONE          ('D' << 24 | 'O' << 16 | 'N' << 8 | 'E' << 0)
+#define MINIDUMP_SS_ENABLED            ('E' << 24 | 'N' << 16 | 'B' << 8 | 'L' << 0)
 
 /**
  * struct minidump_region - Minidump region
@@ -125,7 +125,7 @@ static int qcom_add_minidump_segments(struct rproc *rproc, struct minidump_subsy
 
        for (i = 0; i < seg_cnt; i++) {
                memcpy_fromio(&region, ptr + i, sizeof(region));
-               if (le32_to_cpu(region.valid) == MD_REGION_VALID) {
+               if (le32_to_cpu(region.valid) == MINIDUMP_REGION_VALID) {
                        name = kstrndup(region.name, MAX_REGION_NAME_LENGTH - 1, GFP_KERNEL);
                        if (!name) {
                                iounmap(ptr);
@@ -168,12 +168,21 @@ void qcom_minidump(struct rproc *rproc, unsigned int minidump_id,
         */
        if (subsystem->regions_baseptr == 0 ||
            le32_to_cpu(subsystem->status) != 1 ||
-           le32_to_cpu(subsystem->enabled) != MD_SS_ENABLED ||
-           le32_to_cpu(subsystem->encryption_status) != MD_SS_ENCR_DONE) {
+           le32_to_cpu(subsystem->enabled) != MINIDUMP_SS_ENABLED) {
+               return rproc_coredump(rproc);
+       }
+
+       if (le32_to_cpu(subsystem->encryption_status) != MINIDUMP_SS_ENCR_DONE) {
                dev_err(&rproc->dev, "Minidump not ready, skipping\n");
                return;
        }
 
+       /**
+        * Clear out the dump segments populated by parse_fw before
+        * re-populating them with minidump segments.
+        */
+       rproc_coredump_cleanup(rproc);
+
        ret = qcom_add_minidump_segments(rproc, subsystem, rproc_dumpfn_t);
        if (ret) {
                dev_err(&rproc->dev, "Failed with error: %d while adding minidump entries\n", ret);
index 6777a3b..6c67514 100644 (file)
@@ -14,8 +14,8 @@
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
@@ -637,28 +637,26 @@ static int adsp_init_mmio(struct qcom_adsp *adsp,
 
 static int adsp_alloc_memory_region(struct qcom_adsp *adsp)
 {
+       struct reserved_mem *rmem = NULL;
        struct device_node *node;
-       struct resource r;
-       int ret;
 
        node = of_parse_phandle(adsp->dev->of_node, "memory-region", 0);
-       if (!node) {
-               dev_err(adsp->dev, "no memory-region specified\n");
+       if (node)
+               rmem = of_reserved_mem_lookup(node);
+       of_node_put(node);
+
+       if (!rmem) {
+               dev_err(adsp->dev, "unable to resolve memory-region\n");
                return -EINVAL;
        }
 
-       ret = of_address_to_resource(node, 0, &r);
-       of_node_put(node);
-       if (ret)
-               return ret;
-
-       adsp->mem_phys = adsp->mem_reloc = r.start;
-       adsp->mem_size = resource_size(&r);
+       adsp->mem_phys = adsp->mem_reloc = rmem->base;
+       adsp->mem_size = rmem->size;
        adsp->mem_region = devm_ioremap_wc(adsp->dev,
                                adsp->mem_phys, adsp->mem_size);
        if (!adsp->mem_region) {
                dev_err(adsp->dev, "unable to map memory region: %pa+%zx\n",
-                       &r.start, adsp->mem_size);
+                       &rmem->base, adsp->mem_size);
                return -EBUSY;
        }
 
index 70bffc9..22fe7b5 100644 (file)
@@ -15,9 +15,9 @@
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_reserved_mem.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
@@ -71,6 +71,7 @@
 #define QDSP6SS_MEM_PWR_CTL            0x0B0
 #define QDSP6V6SS_MEM_PWR_CTL          0x034
 #define QDSP6SS_STRAP_ACC              0x110
+#define QDSP6V62SS_BHS_STATUS          0x0C4
 
 /* AXI Halt Register Offsets */
 #define AXI_HALTREQ_REG                        0x0
 #define QDSP6v56_CLAMP_QMC_MEM         BIT(22)
 #define QDSP6SS_XO_CBCR                0x0038
 #define QDSP6SS_ACC_OVERRIDE_VAL               0x20
+#define QDSP6v55_BHS_EN_REST_ACK       BIT(0)
 
 /* QDSP6v65 parameters */
 #define QDSP6SS_CORE_CBCR              0x20
 #define QDSP6SS_BOOT_CORE_START         0x400
 #define QDSP6SS_BOOT_CMD                0x404
 #define BOOT_FSM_TIMEOUT                10000
+#define BHS_CHECK_MAX_LOOPS             200
 
 struct reg_info {
        struct regulator *reg;
@@ -250,6 +253,7 @@ enum {
        MSS_MSM8998,
        MSS_SC7180,
        MSS_SC7280,
+       MSS_SDM660,
        MSS_SDM845,
 };
 
@@ -700,7 +704,8 @@ static int q6v5proc_reset(struct q6v5 *qproc)
        } else if (qproc->version == MSS_MSM8909 ||
                   qproc->version == MSS_MSM8953 ||
                   qproc->version == MSS_MSM8996 ||
-                  qproc->version == MSS_MSM8998) {
+                  qproc->version == MSS_MSM8998 ||
+                  qproc->version == MSS_SDM660) {
 
                if (qproc->version != MSS_MSM8909 &&
                    qproc->version != MSS_MSM8953)
@@ -734,6 +739,16 @@ static int q6v5proc_reset(struct q6v5 *qproc)
                val |= readl(qproc->reg_base + QDSP6SS_PWR_CTL_REG);
                udelay(1);
 
+               if (qproc->version == MSS_SDM660) {
+                       ret = readl_relaxed_poll_timeout(qproc->reg_base + QDSP6V62SS_BHS_STATUS,
+                                                        i, (i & QDSP6v55_BHS_EN_REST_ACK),
+                                                        1, BHS_CHECK_MAX_LOOPS);
+                       if (ret == -ETIMEDOUT) {
+                               dev_err(qproc->dev, "BHS_EN_REST_ACK not set!\n");
+                               return -ETIMEDOUT;
+                       }
+               }
+
                /* Put LDO in bypass mode */
                val |= QDSP6v56_LDO_BYP;
                writel(val, qproc->reg_base + QDSP6SS_PWR_CTL_REG);
@@ -756,7 +771,7 @@ static int q6v5proc_reset(struct q6v5 *qproc)
                                mem_pwr_ctl = QDSP6SS_MEM_PWR_CTL;
                                i = 19;
                        } else {
-                               /* MSS_MSM8998 */
+                               /* MSS_MSM8998, MSS_SDM660 */
                                mem_pwr_ctl = QDSP6V6SS_MEM_PWR_CTL;
                                i = 28;
                        }
@@ -1875,8 +1890,6 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
        struct device_node *child;
        struct reserved_mem *rmem;
        struct device_node *node;
-       struct resource r;
-       int ret;
 
        /*
         * In the absence of mba/mpss sub-child, extract the mba and mpss
@@ -1891,15 +1904,20 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
                of_node_put(child);
        }
 
-       ret = of_address_to_resource(node, 0, &r);
+       if (!node) {
+               dev_err(qproc->dev, "no mba memory-region specified\n");
+               return -EINVAL;
+       }
+
+       rmem = of_reserved_mem_lookup(node);
        of_node_put(node);
-       if (ret) {
+       if (!rmem) {
                dev_err(qproc->dev, "unable to resolve mba region\n");
-               return ret;
+               return -EINVAL;
        }
 
-       qproc->mba_phys = r.start;
-       qproc->mba_size = resource_size(&r);
+       qproc->mba_phys = rmem->base;
+       qproc->mba_size = rmem->size;
 
        if (!child) {
                node = of_parse_phandle(qproc->dev->of_node,
@@ -1910,15 +1928,20 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
                of_node_put(child);
        }
 
-       ret = of_address_to_resource(node, 0, &r);
+       if (!node) {
+               dev_err(qproc->dev, "no mpss memory-region specified\n");
+               return -EINVAL;
+       }
+
+       rmem = of_reserved_mem_lookup(node);
        of_node_put(node);
-       if (ret) {
+       if (!rmem) {
                dev_err(qproc->dev, "unable to resolve mpss region\n");
-               return ret;
+               return -EINVAL;
        }
 
-       qproc->mpss_phys = qproc->mpss_reloc = r.start;
-       qproc->mpss_size = resource_size(&r);
+       qproc->mpss_phys = qproc->mpss_reloc = rmem->base;
+       qproc->mpss_size = rmem->size;
 
        if (!child) {
                node = of_parse_phandle(qproc->dev->of_node, "memory-region", 2);
@@ -2191,6 +2214,37 @@ static const struct rproc_hexagon_res sc7280_mss = {
        .version = MSS_SC7280,
 };
 
+static const struct rproc_hexagon_res sdm660_mss = {
+       .hexagon_mba_image = "mba.mbn",
+       .proxy_clk_names = (char*[]){
+                       "xo",
+                       "qdss",
+                       "mem",
+                       NULL
+       },
+       .active_clk_names = (char*[]){
+                       "iface",
+                       "bus",
+                       "gpll0_mss",
+                       "mnoc_axi",
+                       "snoc_axi",
+                       NULL
+       },
+       .proxy_pd_names = (char*[]){
+                       "cx",
+                       "mx",
+                       NULL
+       },
+       .need_mem_protection = true,
+       .has_alt_reset = false,
+       .has_mba_logs = false,
+       .has_spare_reg = false,
+       .has_qaccept_regs = false,
+       .has_ext_cntl_regs = false,
+       .has_vq6 = false,
+       .version = MSS_SDM660,
+};
+
 static const struct rproc_hexagon_res sdm845_mss = {
        .hexagon_mba_image = "mba.mbn",
        .proxy_clk_names = (char*[]){
@@ -2473,6 +2527,7 @@ static const struct of_device_id q6v5_of_match[] = {
        { .compatible = "qcom,msm8998-mss-pil", .data = &msm8998_mss},
        { .compatible = "qcom,sc7180-mss-pil", .data = &sc7180_mss},
        { .compatible = "qcom,sc7280-mss-pil", .data = &sc7280_mss},
+       { .compatible = "qcom,sdm660-mss-pil", .data = &sdm660_mss},
        { .compatible = "qcom,sdm845-mss-pil", .data = &sdm845_mss},
        { },
 };
index 3153d82..b5447dd 100644 (file)
@@ -13,8 +13,9 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
@@ -425,6 +426,7 @@ static const struct rproc_ops adsp_minidump_ops = {
        .start = adsp_start,
        .stop = adsp_stop,
        .da_to_va = adsp_da_to_va,
+       .parse_fw = qcom_register_dump_segments,
        .load = adsp_load,
        .panic = adsp_panic,
        .coredump = adsp_minidump,
@@ -533,9 +535,8 @@ static void adsp_pds_detach(struct qcom_adsp *adsp, struct device **pds,
 
 static int adsp_alloc_memory_region(struct qcom_adsp *adsp)
 {
+       struct reserved_mem *rmem;
        struct device_node *node;
-       struct resource r;
-       int ret;
 
        node = of_parse_phandle(adsp->dev->of_node, "memory-region", 0);
        if (!node) {
@@ -543,17 +544,19 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp)
                return -EINVAL;
        }
 
-       ret = of_address_to_resource(node, 0, &r);
+       rmem = of_reserved_mem_lookup(node);
        of_node_put(node);
-       if (ret)
-               return ret;
+       if (!rmem) {
+               dev_err(adsp->dev, "unable to resolve memory-region\n");
+               return -EINVAL;
+       }
 
-       adsp->mem_phys = adsp->mem_reloc = r.start;
-       adsp->mem_size = resource_size(&r);
+       adsp->mem_phys = adsp->mem_reloc = rmem->base;
+       adsp->mem_size = rmem->size;
        adsp->mem_region = devm_ioremap_wc(adsp->dev, adsp->mem_phys, adsp->mem_size);
        if (!adsp->mem_region) {
                dev_err(adsp->dev, "unable to map memory region: %pa+%zx\n",
-                       &r.start, adsp->mem_size);
+                       &rmem->base, adsp->mem_size);
                return -EBUSY;
        }
 
@@ -566,16 +569,19 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp)
                return -EINVAL;
        }
 
-       ret = of_address_to_resource(node, 0, &r);
-       if (ret)
-               return ret;
+       rmem = of_reserved_mem_lookup(node);
+       of_node_put(node);
+       if (!rmem) {
+               dev_err(adsp->dev, "unable to resolve dtb memory-region\n");
+               return -EINVAL;
+       }
 
-       adsp->dtb_mem_phys = adsp->dtb_mem_reloc = r.start;
-       adsp->dtb_mem_size = resource_size(&r);
+       adsp->dtb_mem_phys = adsp->dtb_mem_reloc = rmem->base;
+       adsp->dtb_mem_size = rmem->size;
        adsp->dtb_mem_region = devm_ioremap_wc(adsp->dev, adsp->dtb_mem_phys, adsp->dtb_mem_size);
        if (!adsp->dtb_mem_region) {
                dev_err(adsp->dev, "unable to map dtb memory region: %pa+%zx\n",
-                       &r.start, adsp->dtb_mem_size);
+                       &rmem->base, adsp->dtb_mem_size);
                return -EBUSY;
        }
 
@@ -584,29 +590,28 @@ static int adsp_alloc_memory_region(struct qcom_adsp *adsp)
 
 static int adsp_assign_memory_region(struct qcom_adsp *adsp)
 {
+       struct reserved_mem *rmem = NULL;
        struct qcom_scm_vmperm perm;
        struct device_node *node;
-       struct resource r;
        int ret;
 
        if (!adsp->region_assign_idx)
                return 0;
 
        node = of_parse_phandle(adsp->dev->of_node, "memory-region", adsp->region_assign_idx);
-       if (!node) {
-               dev_err(adsp->dev, "missing shareable memory-region\n");
+       if (node)
+               rmem = of_reserved_mem_lookup(node);
+       of_node_put(node);
+       if (!rmem) {
+               dev_err(adsp->dev, "unable to resolve shareable memory-region\n");
                return -EINVAL;
        }
 
-       ret = of_address_to_resource(node, 0, &r);
-       if (ret)
-               return ret;
-
        perm.vmid = QCOM_SCM_VMID_MSS_MSA;
        perm.perm = QCOM_SCM_PERM_RW;
 
-       adsp->region_assign_phys = r.start;
-       adsp->region_assign_size = resource_size(&r);
+       adsp->region_assign_phys = rmem->base;
+       adsp->region_assign_size = rmem->size;
        adsp->region_assign_perms = BIT(QCOM_SCM_VMID_HLOS);
 
        ret = qcom_scm_assign_mem(adsp->region_assign_phys,
@@ -1012,7 +1017,7 @@ static const struct adsp_data sc8180x_mpss_resource = {
        .ssctl_id = 0x12,
 };
 
-static const struct adsp_data slpi_resource_init = {
+static const struct adsp_data msm8996_slpi_resource_init = {
                .crash_reason_smem = 424,
                .firmware_name = "slpi.mdt",
                .pas_id = 12,
@@ -1026,7 +1031,7 @@ static const struct adsp_data slpi_resource_init = {
                .ssctl_id = 0x16,
 };
 
-static const struct adsp_data sm8150_slpi_resource = {
+static const struct adsp_data sdm845_slpi_resource_init = {
                .crash_reason_smem = 424,
                .firmware_name = "slpi.mdt",
                .pas_id = 12,
@@ -1042,38 +1047,6 @@ static const struct adsp_data sm8150_slpi_resource = {
                .ssctl_id = 0x16,
 };
 
-static const struct adsp_data sm8250_slpi_resource = {
-       .crash_reason_smem = 424,
-       .firmware_name = "slpi.mdt",
-       .pas_id = 12,
-       .auto_boot = true,
-       .proxy_pd_names = (char*[]){
-               "lcx",
-               "lmx",
-               NULL
-       },
-       .load_state = "slpi",
-       .ssr_name = "dsps",
-       .sysmon_name = "slpi",
-       .ssctl_id = 0x16,
-};
-
-static const struct adsp_data sm8350_slpi_resource = {
-       .crash_reason_smem = 424,
-       .firmware_name = "slpi.mdt",
-       .pas_id = 12,
-       .auto_boot = true,
-       .proxy_pd_names = (char*[]){
-               "lcx",
-               "lmx",
-               NULL
-       },
-       .load_state = "slpi",
-       .ssr_name = "dsps",
-       .sysmon_name = "slpi",
-       .ssctl_id = 0x16,
-};
-
 static const struct adsp_data wcss_resource_init = {
        .crash_reason_smem = 421,
        .firmware_name = "wcnss.mdt",
@@ -1182,9 +1155,9 @@ static const struct of_device_id adsp_of_match[] = {
        { .compatible = "qcom,msm8953-adsp-pil", .data = &msm8996_adsp_resource},
        { .compatible = "qcom,msm8974-adsp-pil", .data = &adsp_resource_init},
        { .compatible = "qcom,msm8996-adsp-pil", .data = &msm8996_adsp_resource},
-       { .compatible = "qcom,msm8996-slpi-pil", .data = &slpi_resource_init},
+       { .compatible = "qcom,msm8996-slpi-pil", .data = &msm8996_slpi_resource_init},
        { .compatible = "qcom,msm8998-adsp-pas", .data = &msm8996_adsp_resource},
-       { .compatible = "qcom,msm8998-slpi-pas", .data = &slpi_resource_init},
+       { .compatible = "qcom,msm8998-slpi-pas", .data = &msm8996_slpi_resource_init},
        { .compatible = "qcom,qcs404-adsp-pas", .data = &adsp_resource_init },
        { .compatible = "qcom,qcs404-cdsp-pas", .data = &cdsp_resource_init },
        { .compatible = "qcom,qcs404-wcss-pas", .data = &wcss_resource_init },
@@ -1199,6 +1172,7 @@ static const struct of_device_id adsp_of_match[] = {
        { .compatible = "qcom,sdm660-adsp-pas", .data = &adsp_resource_init},
        { .compatible = "qcom,sdm845-adsp-pas", .data = &sdm845_adsp_resource_init},
        { .compatible = "qcom,sdm845-cdsp-pas", .data = &sdm845_cdsp_resource_init},
+       { .compatible = "qcom,sdm845-slpi-pas", .data = &sdm845_slpi_resource_init},
        { .compatible = "qcom,sdx55-mpss-pas", .data = &sdx55_mpss_resource},
        { .compatible = "qcom,sm6115-adsp-pas", .data = &adsp_resource_init},
        { .compatible = "qcom,sm6115-cdsp-pas", .data = &cdsp_resource_init},
@@ -1209,17 +1183,17 @@ static const struct of_device_id adsp_of_match[] = {
        { .compatible = "qcom,sm8150-adsp-pas", .data = &sm8150_adsp_resource},
        { .compatible = "qcom,sm8150-cdsp-pas", .data = &sm8150_cdsp_resource},
        { .compatible = "qcom,sm8150-mpss-pas", .data = &mpss_resource_init},
-       { .compatible = "qcom,sm8150-slpi-pas", .data = &sm8150_slpi_resource},
+       { .compatible = "qcom,sm8150-slpi-pas", .data = &sdm845_slpi_resource_init},
        { .compatible = "qcom,sm8250-adsp-pas", .data = &sm8250_adsp_resource},
        { .compatible = "qcom,sm8250-cdsp-pas", .data = &sm8250_cdsp_resource},
-       { .compatible = "qcom,sm8250-slpi-pas", .data = &sm8250_slpi_resource},
+       { .compatible = "qcom,sm8250-slpi-pas", .data = &sdm845_slpi_resource_init},
        { .compatible = "qcom,sm8350-adsp-pas", .data = &sm8350_adsp_resource},
        { .compatible = "qcom,sm8350-cdsp-pas", .data = &sm8350_cdsp_resource},
-       { .compatible = "qcom,sm8350-slpi-pas", .data = &sm8350_slpi_resource},
+       { .compatible = "qcom,sm8350-slpi-pas", .data = &sdm845_slpi_resource_init},
        { .compatible = "qcom,sm8350-mpss-pas", .data = &mpss_resource_init},
        { .compatible = "qcom,sm8450-adsp-pas", .data = &sm8350_adsp_resource},
        { .compatible = "qcom,sm8450-cdsp-pas", .data = &sm8350_cdsp_resource},
-       { .compatible = "qcom,sm8450-slpi-pas", .data = &sm8350_slpi_resource},
+       { .compatible = "qcom,sm8450-slpi-pas", .data = &sdm845_slpi_resource_init},
        { .compatible = "qcom,sm8450-mpss-pas", .data = &sm8450_mpss_resource},
        { .compatible = "qcom,sm8550-adsp-pas", .data = &sm8550_adsp_resource},
        { .compatible = "qcom,sm8550-cdsp-pas", .data = &sm8550_cdsp_resource},
index b437044..cff1fa0 100644 (file)
@@ -837,8 +837,7 @@ static int q6v5_wcss_init_mmio(struct q6v5_wcss *wcss,
                return -ENOMEM;
 
        if (wcss->version == WCSS_IPQ8074) {
-               res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rmb");
-               wcss->rmb_base = devm_ioremap_resource(&pdev->dev, res);
+               wcss->rmb_base = devm_platform_ioremap_resource_byname(pdev, "rmb");
                if (IS_ERR(wcss->rmb_base))
                        return PTR_ERR(wcss->rmb_base);
        }
index 746f56b..c24e4a8 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/remoteproc/qcom_rproc.h>
 #include <linux/rpmsg.h>
index 1ed0647..90de22c 100644 (file)
@@ -14,8 +14,8 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/io.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
@@ -506,27 +506,25 @@ static int wcnss_request_irq(struct qcom_wcnss *wcnss,
 
 static int wcnss_alloc_memory_region(struct qcom_wcnss *wcnss)
 {
+       struct reserved_mem *rmem = NULL;
        struct device_node *node;
-       struct resource r;
-       int ret;
 
        node = of_parse_phandle(wcnss->dev->of_node, "memory-region", 0);
-       if (!node) {
-               dev_err(wcnss->dev, "no memory-region specified\n");
+       if (node)
+               rmem = of_reserved_mem_lookup(node);
+       of_node_put(node);
+
+       if (!rmem) {
+               dev_err(wcnss->dev, "unable to resolve memory-region\n");
                return -EINVAL;
        }
 
-       ret = of_address_to_resource(node, 0, &r);
-       of_node_put(node);
-       if (ret)
-               return ret;
-
-       wcnss->mem_phys = wcnss->mem_reloc = r.start;
-       wcnss->mem_size = resource_size(&r);
+       wcnss->mem_phys = wcnss->mem_reloc = rmem->base;
+       wcnss->mem_size = rmem->size;
        wcnss->mem_region = devm_ioremap_wc(wcnss->dev, wcnss->mem_phys, wcnss->mem_size);
        if (!wcnss->mem_region) {
                dev_err(wcnss->dev, "unable to map memory region: %pa+%zx\n",
-                       &r.start, wcnss->mem_size);
+                       &rmem->base, wcnss->mem_size);
                return -EBUSY;
        }
 
@@ -538,7 +536,6 @@ static int wcnss_probe(struct platform_device *pdev)
        const char *fw_name = WCNSS_FIRMWARE_NAME;
        const struct wcnss_data *data;
        struct qcom_wcnss *wcnss;
-       struct resource *res;
        struct rproc *rproc;
        void __iomem *mmio;
        int ret;
@@ -576,8 +573,7 @@ static int wcnss_probe(struct platform_device *pdev)
 
        mutex_init(&wcnss->iris_lock);
 
-       res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pmu");
-       mmio = devm_ioremap_resource(&pdev->dev, res);
+       mmio = devm_platform_ioremap_resource_byname(pdev, "pmu");
        if (IS_ERR(mmio)) {
                ret = PTR_ERR(mmio);
                goto free_rproc;
index 09720dd..dd36fd0 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/clk.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
index 90e8769..cc17e84 100644 (file)
@@ -5,8 +5,9 @@
 
 #include <linux/limits.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/remoteproc.h>
 #include <linux/reset.h>
index bc0e160..6ede8c0 100644 (file)
@@ -32,6 +32,7 @@ void rproc_coredump_cleanup(struct rproc *rproc)
                kfree(entry);
        }
 }
+EXPORT_SYMBOL_GPL(rproc_coredump_cleanup);
 
 /**
  * rproc_coredump_add_segment() - add segment of device memory to coredump
@@ -327,6 +328,7 @@ void rproc_coredump(struct rproc *rproc)
         */
        wait_for_completion(&dump_state.dump_done);
 }
+EXPORT_SYMBOL_GPL(rproc_coredump);
 
 /**
  * rproc_coredump_using_sections() - perform coredump using section headers
index d4dbb8d..f62a82d 100644 (file)
@@ -76,10 +76,6 @@ extern struct class rproc_class;
 int rproc_init_sysfs(void);
 void rproc_exit_sysfs(void);
 
-/* from remoteproc_coredump.c */
-void rproc_coredump_cleanup(struct rproc *rproc);
-void rproc_coredump(struct rproc *rproc);
-
 #ifdef CONFIG_REMOTEPROC_CDEV
 void rproc_init_cdev(void);
 void rproc_exit_cdev(void);
index 4ed9467..d177193 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/remoteproc.h>
 #include <linux/remoteproc/st_slim_rproc.h>
index cf073ba..9d9b135 100644 (file)
@@ -12,9 +12,9 @@
 #include <linux/mailbox_client.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
 #include <linux/regmap.h>
 #include <linux/remoteproc.h>
@@ -921,7 +921,7 @@ static void stm32_rproc_remove(struct platform_device *pdev)
        rproc_free(rproc);
 }
 
-static int __maybe_unused stm32_rproc_suspend(struct device *dev)
+static int stm32_rproc_suspend(struct device *dev)
 {
        struct rproc *rproc = dev_get_drvdata(dev);
        struct stm32_rproc *ddata = rproc->priv;
@@ -932,7 +932,7 @@ static int __maybe_unused stm32_rproc_suspend(struct device *dev)
        return 0;
 }
 
-static int __maybe_unused stm32_rproc_resume(struct device *dev)
+static int stm32_rproc_resume(struct device *dev)
 {
        struct rproc *rproc = dev_get_drvdata(dev);
        struct stm32_rproc *ddata = rproc->priv;
@@ -943,16 +943,16 @@ static int __maybe_unused stm32_rproc_resume(struct device *dev)
        return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(stm32_rproc_pm_ops,
-                        stm32_rproc_suspend, stm32_rproc_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(stm32_rproc_pm_ops,
+                               stm32_rproc_suspend, stm32_rproc_resume);
 
 static struct platform_driver stm32_rproc_driver = {
        .probe = stm32_rproc_probe,
        .remove_new = stm32_rproc_remove,
        .driver = {
                .name = "stm32-rproc",
-               .pm = &stm32_rproc_pm_ops,
-               .of_match_table = of_match_ptr(stm32_rproc_match),
+               .pm = pm_ptr(&stm32_rproc_pm_ops),
+               .of_match_table = stm32_rproc_match,
        },
 };
 module_platform_driver(stm32_rproc_driver);
index ec626a3..ef8415a 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/io.h>
 #include <linux/mailbox_client.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_reserved_mem.h>
 #include <linux/omap-mailbox.h>
 #include <linux/platform_device.h>
index 23fe44d..ad3415a 100644 (file)
 #include <linux/kernel.h>
 #include <linux/mailbox_client.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_reserved_mem.h>
+#include <linux/of_platform.h>
 #include <linux/omap-mailbox.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
index 120dc7d..36a55f7 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
index 1beb40a..82d460f 100644 (file)
@@ -200,9 +200,15 @@ static const struct rpmsg_endpoint_ops glink_endpoint_ops;
 #define GLINK_CMD_TX_DATA_CONT         12
 #define GLINK_CMD_READ_NOTIF           13
 #define GLINK_CMD_RX_DONE_W_REUSE      14
+#define GLINK_CMD_SIGNALS              15
 
 #define GLINK_FEATURE_INTENTLESS       BIT(1)
 
+#define NATIVE_DTR_SIG                 NATIVE_DSR_SIG
+#define NATIVE_DSR_SIG                 BIT(31)
+#define NATIVE_RTS_SIG                 NATIVE_CTS_SIG
+#define NATIVE_CTS_SIG                 BIT(30)
+
 static void qcom_glink_rx_done_work(struct work_struct *work);
 
 static struct glink_channel *qcom_glink_alloc_channel(struct qcom_glink *glink,
@@ -221,6 +227,10 @@ static struct glink_channel *qcom_glink_alloc_channel(struct qcom_glink *glink,
 
        channel->glink = glink;
        channel->name = kstrdup(name, GFP_KERNEL);
+       if (!channel->name) {
+               kfree(channel);
+               return ERR_PTR(-ENOMEM);
+       }
 
        init_completion(&channel->open_req);
        init_completion(&channel->open_ack);
@@ -1025,6 +1035,52 @@ static int qcom_glink_rx_open_ack(struct qcom_glink *glink, unsigned int lcid)
        return 0;
 }
 
+/**
+ * qcom_glink_set_flow_control() - convert a signal cmd to wire format and transmit
+ * @ept:       Rpmsg endpoint for channel.
+ * @pause:     Pause transmission
+ * @dst:       destination address of the endpoint
+ *
+ * Return: 0 on success or standard Linux error code.
+ */
+static int qcom_glink_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst)
+{
+       struct glink_channel *channel = to_glink_channel(ept);
+       struct qcom_glink *glink = channel->glink;
+       struct glink_msg msg;
+       u32 sigs = 0;
+
+       if (pause)
+               sigs |= NATIVE_DTR_SIG | NATIVE_RTS_SIG;
+
+       msg.cmd = cpu_to_le16(GLINK_CMD_SIGNALS);
+       msg.param1 = cpu_to_le16(channel->lcid);
+       msg.param2 = cpu_to_le32(sigs);
+
+       return qcom_glink_tx(glink, &msg, sizeof(msg), NULL, 0, true);
+}
+
+static void qcom_glink_handle_signals(struct qcom_glink *glink,
+                                     unsigned int rcid, unsigned int sigs)
+{
+       struct glink_channel *channel;
+       unsigned long flags;
+       bool enable;
+
+       spin_lock_irqsave(&glink->idr_lock, flags);
+       channel = idr_find(&glink->rcids, rcid);
+       spin_unlock_irqrestore(&glink->idr_lock, flags);
+       if (!channel) {
+               dev_err(glink->dev, "signal for non-existing channel\n");
+               return;
+       }
+
+       enable = sigs & NATIVE_DSR_SIG || sigs & NATIVE_CTS_SIG;
+
+       if (channel->ept.flow_cb)
+               channel->ept.flow_cb(channel->ept.rpdev, channel->ept.priv, enable);
+}
+
 void qcom_glink_native_rx(struct qcom_glink *glink)
 {
        struct glink_msg msg;
@@ -1086,6 +1142,10 @@ void qcom_glink_native_rx(struct qcom_glink *glink)
                        qcom_glink_handle_intent_req_ack(glink, param1, param2);
                        qcom_glink_rx_advance(glink, ALIGN(sizeof(msg), 8));
                        break;
+               case GLINK_CMD_SIGNALS:
+                       qcom_glink_handle_signals(glink, param1, param2);
+                       qcom_glink_rx_advance(glink, ALIGN(sizeof(msg), 8));
+                       break;
                default:
                        dev_err(glink->dev, "unhandled rx cmd: %d\n", cmd);
                        ret = -EINVAL;
@@ -1446,6 +1506,7 @@ static const struct rpmsg_endpoint_ops glink_endpoint_ops = {
        .sendto = qcom_glink_sendto,
        .trysend = qcom_glink_trysend,
        .trysendto = qcom_glink_trysendto,
+       .set_flow_control = qcom_glink_set_flow_control,
 };
 
 static void qcom_glink_rpdev_release(struct device *dev)
index a271fce..09833ad 100644 (file)
@@ -52,6 +52,8 @@ static DEFINE_IDA(rpmsg_minor_ida);
  * @readq:     wait object for incoming queue
  * @default_ept: set to channel default endpoint if the default endpoint should be re-used
  *              on device open to prevent endpoint address update.
+ * remote_flow_restricted: to indicate if the remote has requested for flow to be limited
+ * remote_flow_updated: to indicate if the flow control has been requested
  */
 struct rpmsg_eptdev {
        struct device dev;
@@ -68,6 +70,8 @@ struct rpmsg_eptdev {
        struct sk_buff_head queue;
        wait_queue_head_t readq;
 
+       bool remote_flow_restricted;
+       bool remote_flow_updated;
 };
 
 int rpmsg_chrdev_eptdev_destroy(struct device *dev, void *data)
@@ -116,6 +120,18 @@ static int rpmsg_ept_cb(struct rpmsg_device *rpdev, void *buf, int len,
        return 0;
 }
 
+static int rpmsg_ept_flow_cb(struct rpmsg_device *rpdev, void *priv, bool enable)
+{
+       struct rpmsg_eptdev *eptdev = priv;
+
+       eptdev->remote_flow_restricted = enable;
+       eptdev->remote_flow_updated = true;
+
+       wake_up_interruptible(&eptdev->readq);
+
+       return 0;
+}
+
 static int rpmsg_eptdev_open(struct inode *inode, struct file *filp)
 {
        struct rpmsg_eptdev *eptdev = cdev_to_eptdev(inode->i_cdev);
@@ -152,6 +168,7 @@ static int rpmsg_eptdev_open(struct inode *inode, struct file *filp)
                return -EINVAL;
        }
 
+       ept->flow_cb = rpmsg_ept_flow_cb;
        eptdev->ept = ept;
        filp->private_data = eptdev;
        mutex_unlock(&eptdev->ept_lock);
@@ -172,6 +189,7 @@ static int rpmsg_eptdev_release(struct inode *inode, struct file *filp)
                eptdev->ept = NULL;
        }
        mutex_unlock(&eptdev->ept_lock);
+       eptdev->remote_flow_updated = false;
 
        /* Discard all SKBs */
        skb_queue_purge(&eptdev->queue);
@@ -285,6 +303,9 @@ static __poll_t rpmsg_eptdev_poll(struct file *filp, poll_table *wait)
        if (!skb_queue_empty(&eptdev->queue))
                mask |= EPOLLIN | EPOLLRDNORM;
 
+       if (eptdev->remote_flow_updated)
+               mask |= EPOLLPRI;
+
        mutex_lock(&eptdev->ept_lock);
        mask |= rpmsg_poll(eptdev->ept, filp, wait);
        mutex_unlock(&eptdev->ept_lock);
@@ -297,14 +318,35 @@ static long rpmsg_eptdev_ioctl(struct file *fp, unsigned int cmd,
 {
        struct rpmsg_eptdev *eptdev = fp->private_data;
 
-       if (cmd != RPMSG_DESTROY_EPT_IOCTL)
-               return -EINVAL;
+       bool set;
+       int ret;
 
-       /* Don't allow to destroy a default endpoint. */
-       if (eptdev->default_ept)
-               return -EINVAL;
+       switch (cmd) {
+       case RPMSG_GET_OUTGOING_FLOWCONTROL:
+               eptdev->remote_flow_updated = false;
+               ret = put_user(eptdev->remote_flow_restricted, (int __user *)arg);
+               break;
+       case RPMSG_SET_INCOMING_FLOWCONTROL:
+               if (arg > 1) {
+                       ret = -EINVAL;
+                       break;
+               }
+               set = !!arg;
+               ret = rpmsg_set_flow_control(eptdev->ept, set, eptdev->chinfo.dst);
+               break;
+       case RPMSG_DESTROY_EPT_IOCTL:
+               /* Don't allow to destroy a default endpoint. */
+               if (eptdev->default_ept) {
+                       ret = -EINVAL;
+                       break;
+               }
+               ret = rpmsg_chrdev_eptdev_destroy(&eptdev->dev, NULL);
+               break;
+       default:
+               ret = -EINVAL;
+       }
 
-       return rpmsg_chrdev_eptdev_destroy(&eptdev->dev, NULL);
+       return ret;
 }
 
 static const struct file_operations rpmsg_eptdev_fops = {
index 5039df7..32b550c 100644 (file)
@@ -331,6 +331,25 @@ int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
 EXPORT_SYMBOL(rpmsg_trysend_offchannel);
 
 /**
+ * rpmsg_set_flow_control() - request remote to pause/resume transmission
+ * @ept:       the rpmsg endpoint
+ * @pause:     pause transmission
+ * @dst:       destination address of the endpoint
+ *
+ * Return: 0 on success and an appropriate error value on failure.
+ */
+int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst)
+{
+       if (WARN_ON(!ept))
+               return -EINVAL;
+       if (!ept->ops->set_flow_control)
+               return -EOPNOTSUPP;
+
+       return ept->ops->set_flow_control(ept, pause, dst);
+}
+EXPORT_SYMBOL_GPL(rpmsg_set_flow_control);
+
+/**
  * rpmsg_get_mtu() - get maximum transmission buffer size for sending message.
  * @ept: the rpmsg endpoint
  *
@@ -539,6 +558,8 @@ static int rpmsg_dev_probe(struct device *dev)
 
                rpdev->ept = ept;
                rpdev->src = ept->addr;
+
+               ept->flow_cb = rpdrv->flowcontrol;
        }
 
        err = rpdrv->probe(rpdev);
index 39b646d..b950d6f 100644 (file)
@@ -55,6 +55,7 @@ struct rpmsg_device_ops {
  * @trysendto:         see @rpmsg_trysendto(), optional
  * @trysend_offchannel:        see @rpmsg_trysend_offchannel(), optional
  * @poll:              see @rpmsg_poll(), optional
+ * @set_flow_control:  see @rpmsg_set_flow_control(), optional
  * @get_mtu:           see @rpmsg_get_mtu(), optional
  *
  * Indirection table for the operations that a rpmsg backend should implement.
@@ -75,6 +76,7 @@ struct rpmsg_endpoint_ops {
                             void *data, int len);
        __poll_t (*poll)(struct rpmsg_endpoint *ept, struct file *filp,
                             poll_table *wait);
+       int (*set_flow_control)(struct rpmsg_endpoint *ept, bool pause, u32 dst);
        ssize_t (*get_mtu)(struct rpmsg_endpoint *ept);
 };
 
index 05f4b2d..d750243 100644 (file)
@@ -904,9 +904,9 @@ config RTC_DRV_PCF2127
        select REGMAP_SPI if SPI_MASTER
        select WATCHDOG_CORE if WATCHDOG
        help
-         If you say yes here you get support for the NXP PCF2127/29 RTC
+         If you say yes here you get support for the NXP PCF2127/29/31 RTC
          chips with integrated quartz crystal for industrial applications.
-         Both chips also have watchdog timer and tamper switch detection
+         These chips also have watchdog timer and tamper switch detection
          features.
 
          PCF2127 has an additional feature of 512 bytes battery backed
@@ -1196,6 +1196,7 @@ config RTC_DRV_MSM6242
 config RTC_DRV_BQ4802
        tristate "TI BQ4802"
        depends on HAS_IOMEM && HAS_IOPORT
+       depends on SPARC || COMPILE_TEST
        help
          If you say Y here you will get support for the TI
          BQ4802 RTC chip.
index 499d891..1b63111 100644 (file)
@@ -376,7 +376,7 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
        err = rtc_valid_tm(&alarm->time);
 
 done:
-       if (err)
+       if (err && alarm->enabled)
                dev_warn(&rtc->dev, "invalid alarm value: %ptR\n",
                         &alarm->time);
 
index e08d318..fde2b80 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/i2c.h>
 #include <linux/kstrtox.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/rtc.h>
 #include <linux/watchdog.h>
 
index b4139c2..569c105 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
@@ -474,7 +473,6 @@ static const struct armada38x_rtc_data armada8k_data = {
        .alarm = ALARM2,
 };
 
-#ifdef CONFIG_OF
 static const struct of_device_id armada38x_rtc_of_match_table[] = {
        {
                .compatible = "marvell,armada-380-rtc",
@@ -487,7 +485,6 @@ static const struct of_device_id armada38x_rtc_of_match_table[] = {
        {}
 };
 MODULE_DEVICE_TABLE(of, armada38x_rtc_of_match_table);
-#endif
 
 static __init int armada38x_rtc_probe(struct platform_device *pdev)
 {
@@ -577,7 +574,7 @@ static struct platform_driver armada38x_rtc_driver = {
        .driver         = {
                .name   = "armada38x-rtc",
                .pm     = &armada38x_rtc_pm_ops,
-               .of_match_table = of_match_ptr(armada38x_rtc_of_match_table),
+               .of_match_table = armada38x_rtc_of_match_table,
        },
 };
 
index a93352e..880b015 100644 (file)
@@ -118,7 +118,7 @@ MODULE_DEVICE_TABLE(of, aspeed_rtc_match);
 static struct platform_driver aspeed_rtc_driver = {
        .driver = {
                .name = "aspeed-rtc",
-               .of_match_table = of_match_ptr(aspeed_rtc_match),
+               .of_match_table = aspeed_rtc_match,
        },
 };
 
index e9d1723..add4f71 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
@@ -642,7 +641,7 @@ static struct platform_driver at91_rtc_driver = {
        .driver         = {
                .name   = "at91_rtc",
                .pm     = &at91_rtc_pm_ops,
-               .of_match_table = of_match_ptr(at91_rtc_dt_ids),
+               .of_match_table = at91_rtc_dt_ids,
        },
 };
 
index 610f27d..f93bee9 100644 (file)
@@ -534,7 +534,7 @@ static struct platform_driver at91_rtc_driver = {
        .driver         = {
                .name   = "rtc-at91sam9",
                .pm     = &at91_rtc_pm_ops,
-               .of_match_table = of_match_ptr(at91_rtc_dt_ids),
+               .of_match_table = at91_rtc_dt_ids,
        },
 };
 
index c9416fe..228fb2d 100644 (file)
@@ -913,6 +913,10 @@ static inline void cmos_check_acpi_rtc_status(struct device *dev,
 #define        INITSECTION     __init
 #endif
 
+#define SECS_PER_DAY   (24 * 60 * 60)
+#define SECS_PER_MONTH (28 * SECS_PER_DAY)
+#define SECS_PER_YEAR  (365 * SECS_PER_DAY)
+
 static int INITSECTION
 cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 {
@@ -1019,6 +1023,13 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                goto cleanup0;
        }
 
+       if (cmos_rtc.mon_alrm)
+               cmos_rtc.rtc->alarm_offset_max = SECS_PER_YEAR - 1;
+       else if (cmos_rtc.day_alrm)
+               cmos_rtc.rtc->alarm_offset_max = SECS_PER_MONTH - 1;
+       else
+               cmos_rtc.rtc->alarm_offset_max = SECS_PER_DAY - 1;
+
        rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
 
        if (!mc146818_does_rtc_work()) {
index 998ab86..0cd397c 100644 (file)
@@ -182,21 +182,15 @@ static int cros_ec_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
        ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, alarm_offset);
        if (ret < 0) {
-               if (ret == -EINVAL && alarm_offset >= SECS_PER_DAY) {
-                       /*
-                        * RTC chips on some older Chromebooks can only handle
-                        * alarms up to 24h in the future. Try to set an alarm
-                        * below that limit to avoid suspend failures.
-                        */
-                       ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM,
-                                             SECS_PER_DAY - 1);
-               }
-
-               if (ret < 0) {
-                       dev_err(dev, "error setting alarm in %u seconds: %d\n",
-                               alarm_offset, ret);
-                       return ret;
-               }
+               dev_err(dev, "error setting alarm in %u seconds: %d\n",
+                       alarm_offset, ret);
+               /*
+                * The EC code returns -EINVAL if the alarm time is too
+                * far in the future. Convert it to the expected error code.
+                */
+               if (ret == -EINVAL)
+                       ret = -ERANGE;
+               return ret;
        }
 
        return 0;
@@ -355,6 +349,20 @@ static int cros_ec_rtc_probe(struct platform_device *pdev)
        cros_ec_rtc->rtc->ops = &cros_ec_rtc_ops;
        cros_ec_rtc->rtc->range_max = U32_MAX;
 
+       /*
+        * The RTC on some older Chromebooks can only handle alarms less than
+        * 24 hours in the future. The only way to find out is to try to set an
+        * alarm further in the future. If that fails, assume that the RTC
+        * connected to the EC can only handle less than 24 hours of alarm
+        * window.
+        */
+       ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, SECS_PER_DAY * 2);
+       if (ret == -EINVAL)
+               cros_ec_rtc->rtc->alarm_offset_max = SECS_PER_DAY - 1;
+
+       (void)cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM,
+                             EC_RTC_ALARM_CLEAR);
+
        ret = devm_rtc_register_device(cros_ec_rtc->rtc);
        if (ret)
                return ret;
index ee2efb4..2f5d606 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -496,6 +497,12 @@ static int da9063_rtc_probe(struct platform_device *pdev)
                dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
                        irq_alarm, ret);
 
+       ret = dev_pm_set_wake_irq(&pdev->dev, irq_alarm);
+       if (ret)
+               dev_warn(&pdev->dev,
+                        "Failed to set IRQ %d as a wake IRQ: %d\n",
+                        irq_alarm, ret);
+
        device_init_wakeup(&pdev->dev, true);
 
        return devm_rtc_register_device(rtc->rtc_dev);
index ed93604..d4de401 100644 (file)
@@ -336,8 +336,8 @@ static int ds1305_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
        /* make sure alarm fires within the next 24 hours */
        if (later <= now)
                return -EINVAL;
-       if ((later - now) > 24 * 60 * 60)
-               return -EDOM;
+       if ((later - now) > ds1305->rtc->alarm_offset_max)
+               return -ERANGE;
 
        /* disable alarm if needed */
        if (ds1305->ctrl[0] & DS1305_AEI0) {
@@ -691,6 +691,7 @@ static int ds1305_probe(struct spi_device *spi)
        ds1305->rtc->ops = &ds1305_ops;
        ds1305->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        ds1305->rtc->range_max = RTC_TIMESTAMP_END_2099;
+       ds1305->rtc->alarm_offset_max = 24 * 60 * 60;
 
        ds1305_nvmem_cfg.priv = ds1305;
        status = devm_rtc_register_device(ds1305->rtc);
index cb5acec..506b7d1 100644 (file)
@@ -1744,7 +1744,7 @@ static int ds1307_probe(struct i2c_client *client)
 
        match = device_get_match_data(&client->dev);
        if (match) {
-               ds1307->type = (enum ds_type)match;
+               ds1307->type = (uintptr_t)match;
                chip = &chips[ds1307->type];
        } else if (id) {
                chip = &chips[id->driver_data];
index a5026b0..6ae8b9a 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/jiffies.h>
 #include <linux/rtc.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/module.h>
index 0480f59..3231fd9 100644 (file)
@@ -7,9 +7,8 @@
 #include <linux/rtc.h>
 #include <linux/types.h>
 #include <linux/bcd.h>
-#include <linux/platform_data/rtc-ds2404.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 
 #include <linux/io.h>
 #define DS2404_CLK     1
 #define DS2404_DQ      2
 
-struct ds2404_gpio {
-       const char *name;
-       unsigned int gpio;
-};
-
 struct ds2404 {
-       struct ds2404_gpio *gpio;
+       struct device *dev;
+       struct gpio_desc *rst_gpiod;
+       struct gpio_desc *clk_gpiod;
+       struct gpio_desc *dq_gpiod;
        struct rtc_device *rtc;
 };
 
-static struct ds2404_gpio ds2404_gpio[] = {
-       { "RTC RST", 0 },
-       { "RTC CLK", 0 },
-       { "RTC DQ", 0 },
-};
-
-static int ds2404_gpio_map(struct ds2404 *chip, struct platform_device *pdev,
-                         struct ds2404_platform_data *pdata)
+static int ds2404_gpio_map(struct ds2404 *chip, struct platform_device *pdev)
 {
-       int i, err;
-
-       ds2404_gpio[DS2404_RST].gpio = pdata->gpio_rst;
-       ds2404_gpio[DS2404_CLK].gpio = pdata->gpio_clk;
-       ds2404_gpio[DS2404_DQ].gpio = pdata->gpio_dq;
-
-       for (i = 0; i < ARRAY_SIZE(ds2404_gpio); i++) {
-               err = gpio_request(ds2404_gpio[i].gpio, ds2404_gpio[i].name);
-               if (err) {
-                       dev_err(&pdev->dev, "error mapping gpio %s: %d\n",
-                               ds2404_gpio[i].name, err);
-                       goto err_request;
-               }
-               if (i != DS2404_DQ)
-                       gpio_direction_output(ds2404_gpio[i].gpio, 1);
-       }
+       struct device *dev = &pdev->dev;
 
-       chip->gpio = ds2404_gpio;
-       return 0;
+       /* This will de-assert RESET, declare this GPIO as GPIOD_ACTIVE_LOW */
+       chip->rst_gpiod = devm_gpiod_get(dev, "rst", GPIOD_OUT_LOW);
+       if (IS_ERR(chip->rst_gpiod))
+               return PTR_ERR(chip->rst_gpiod);
 
-err_request:
-       while (--i >= 0)
-               gpio_free(ds2404_gpio[i].gpio);
-       return err;
-}
+       chip->clk_gpiod = devm_gpiod_get(dev, "clk", GPIOD_OUT_HIGH);
+       if (IS_ERR(chip->clk_gpiod))
+               return PTR_ERR(chip->clk_gpiod);
 
-static void ds2404_gpio_unmap(void *data)
-{
-       int i;
+       chip->dq_gpiod = devm_gpiod_get(dev, "dq", GPIOD_ASIS);
+       if (IS_ERR(chip->dq_gpiod))
+               return PTR_ERR(chip->dq_gpiod);
 
-       for (i = 0; i < ARRAY_SIZE(ds2404_gpio); i++)
-               gpio_free(ds2404_gpio[i].gpio);
+       return 0;
 }
 
-static void ds2404_reset(struct device *dev)
+static void ds2404_reset(struct ds2404 *chip)
 {
-       gpio_set_value(ds2404_gpio[DS2404_RST].gpio, 0);
+       gpiod_set_value(chip->rst_gpiod, 1);
        udelay(1000);
-       gpio_set_value(ds2404_gpio[DS2404_RST].gpio, 1);
-       gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0);
-       gpio_direction_output(ds2404_gpio[DS2404_DQ].gpio, 0);
+       gpiod_set_value(chip->rst_gpiod, 0);
+       gpiod_set_value(chip->clk_gpiod, 0);
+       gpiod_direction_output(chip->dq_gpiod, 0);
        udelay(10);
 }
 
-static void ds2404_write_byte(struct device *dev, u8 byte)
+static void ds2404_write_byte(struct ds2404 *chip, u8 byte)
 {
        int i;
 
-       gpio_direction_output(ds2404_gpio[DS2404_DQ].gpio, 1);
+       gpiod_direction_output(chip->dq_gpiod, 1);
        for (i = 0; i < 8; i++) {
-               gpio_set_value(ds2404_gpio[DS2404_DQ].gpio, byte & (1 << i));
+               gpiod_set_value(chip->dq_gpiod, byte & (1 << i));
                udelay(10);
-               gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 1);
+               gpiod_set_value(chip->clk_gpiod, 1);
                udelay(10);
-               gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0);
+               gpiod_set_value(chip->clk_gpiod, 0);
                udelay(10);
        }
 }
 
-static u8 ds2404_read_byte(struct device *dev)
+static u8 ds2404_read_byte(struct ds2404 *chip)
 {
        int i;
        u8 ret = 0;
 
-       gpio_direction_input(ds2404_gpio[DS2404_DQ].gpio);
+       gpiod_direction_input(chip->dq_gpiod);
 
        for (i = 0; i < 8; i++) {
-               gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0);
+               gpiod_set_value(chip->clk_gpiod, 0);
                udelay(10);
-               if (gpio_get_value(ds2404_gpio[DS2404_DQ].gpio))
+               if (gpiod_get_value(chip->dq_gpiod))
                        ret |= 1 << i;
-               gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 1);
+               gpiod_set_value(chip->clk_gpiod, 1);
                udelay(10);
        }
        return ret;
 }
 
-static void ds2404_read_memory(struct device *dev, u16 offset,
+static void ds2404_read_memory(struct ds2404 *chip, u16 offset,
                               int length, u8 *out)
 {
-       ds2404_reset(dev);
-       ds2404_write_byte(dev, DS2404_READ_MEMORY_CMD);
-       ds2404_write_byte(dev, offset & 0xff);
-       ds2404_write_byte(dev, (offset >> 8) & 0xff);
+       ds2404_reset(chip);
+       ds2404_write_byte(chip, DS2404_READ_MEMORY_CMD);
+       ds2404_write_byte(chip, offset & 0xff);
+       ds2404_write_byte(chip, (offset >> 8) & 0xff);
        while (length--)
-               *out++ = ds2404_read_byte(dev);
+               *out++ = ds2404_read_byte(chip);
 }
 
-static void ds2404_write_memory(struct device *dev, u16 offset,
+static void ds2404_write_memory(struct ds2404 *chip, u16 offset,
                                int length, u8 *out)
 {
        int i;
        u8 ta01, ta02, es;
 
-       ds2404_reset(dev);
-       ds2404_write_byte(dev, DS2404_WRITE_SCRATCHPAD_CMD);
-       ds2404_write_byte(dev, offset & 0xff);
-       ds2404_write_byte(dev, (offset >> 8) & 0xff);
+       ds2404_reset(chip);
+       ds2404_write_byte(chip, DS2404_WRITE_SCRATCHPAD_CMD);
+       ds2404_write_byte(chip, offset & 0xff);
+       ds2404_write_byte(chip, (offset >> 8) & 0xff);
 
        for (i = 0; i < length; i++)
-               ds2404_write_byte(dev, out[i]);
+               ds2404_write_byte(chip, out[i]);
 
-       ds2404_reset(dev);
-       ds2404_write_byte(dev, DS2404_READ_SCRATCHPAD_CMD);
+       ds2404_reset(chip);
+       ds2404_write_byte(chip, DS2404_READ_SCRATCHPAD_CMD);
 
-       ta01 = ds2404_read_byte(dev);
-       ta02 = ds2404_read_byte(dev);
-       es = ds2404_read_byte(dev);
+       ta01 = ds2404_read_byte(chip);
+       ta02 = ds2404_read_byte(chip);
+       es = ds2404_read_byte(chip);
 
        for (i = 0; i < length; i++) {
-               if (out[i] != ds2404_read_byte(dev)) {
-                       dev_err(dev, "read invalid data\n");
+               if (out[i] != ds2404_read_byte(chip)) {
+                       dev_err(chip->dev, "read invalid data\n");
                        return;
                }
        }
 
-       ds2404_reset(dev);
-       ds2404_write_byte(dev, DS2404_COPY_SCRATCHPAD_CMD);
-       ds2404_write_byte(dev, ta01);
-       ds2404_write_byte(dev, ta02);
-       ds2404_write_byte(dev, es);
+       ds2404_reset(chip);
+       ds2404_write_byte(chip, DS2404_COPY_SCRATCHPAD_CMD);
+       ds2404_write_byte(chip, ta01);
+       ds2404_write_byte(chip, ta02);
+       ds2404_write_byte(chip, es);
 
-       gpio_direction_input(ds2404_gpio[DS2404_DQ].gpio);
-       while (gpio_get_value(ds2404_gpio[DS2404_DQ].gpio))
+       while (gpiod_get_value(chip->dq_gpiod))
                ;
 }
 
-static void ds2404_enable_osc(struct device *dev)
+static void ds2404_enable_osc(struct ds2404 *chip)
 {
        u8 in[1] = { 0x10 }; /* enable oscillator */
-       ds2404_write_memory(dev, 0x201, 1, in);
+
+       ds2404_write_memory(chip, 0x201, 1, in);
 }
 
 static int ds2404_read_time(struct device *dev, struct rtc_time *dt)
 {
+       struct ds2404 *chip = dev_get_drvdata(dev);
        unsigned long time = 0;
        __le32 hw_time = 0;
 
-       ds2404_read_memory(dev, 0x203, 4, (u8 *)&hw_time);
+       ds2404_read_memory(chip, 0x203, 4, (u8 *)&hw_time);
        time = le32_to_cpu(hw_time);
 
        rtc_time64_to_tm(time, dt);
@@ -193,8 +168,9 @@ static int ds2404_read_time(struct device *dev, struct rtc_time *dt)
 
 static int ds2404_set_time(struct device *dev, struct rtc_time *dt)
 {
+       struct ds2404 *chip = dev_get_drvdata(dev);
        u32 time = cpu_to_le32(rtc_tm_to_time64(dt));
-       ds2404_write_memory(dev, 0x203, 4, (u8 *)&time);
+       ds2404_write_memory(chip, 0x203, 4, (u8 *)&time);
        return 0;
 }
 
@@ -205,7 +181,6 @@ static const struct rtc_class_ops ds2404_rtc_ops = {
 
 static int rtc_probe(struct platform_device *pdev)
 {
-       struct ds2404_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct ds2404 *chip;
        int retval = -EBUSY;
 
@@ -213,22 +188,16 @@ static int rtc_probe(struct platform_device *pdev)
        if (!chip)
                return -ENOMEM;
 
+       chip->dev = &pdev->dev;
+
        chip->rtc = devm_rtc_allocate_device(&pdev->dev);
        if (IS_ERR(chip->rtc))
                return PTR_ERR(chip->rtc);
 
-       retval = ds2404_gpio_map(chip, pdev, pdata);
+       retval = ds2404_gpio_map(chip, pdev);
        if (retval)
                return retval;
 
-       retval = devm_add_action_or_reset(&pdev->dev, ds2404_gpio_unmap, chip);
-       if (retval)
-               return retval;
-
-       dev_info(&pdev->dev, "using GPIOs RST:%d, CLK:%d, DQ:%d\n",
-                chip->gpio[DS2404_RST].gpio, chip->gpio[DS2404_CLK].gpio,
-                chip->gpio[DS2404_DQ].gpio);
-
        platform_set_drvdata(pdev, chip);
 
        chip->rtc->ops = &ds2404_rtc_ops;
@@ -238,7 +207,7 @@ static int rtc_probe(struct platform_device *pdev)
        if (retval)
                return retval;
 
-       ds2404_enable_osc(&pdev->dev);
+       ds2404_enable_osc(chip);
        return 0;
 }
 
index 3d7c407..a72c4ad 100644 (file)
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
 #include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/fsl/ftm.h>
 #include <linux/rtc.h>
index a613257..4eef7af 100644 (file)
@@ -9,6 +9,8 @@
  */
 
 #include <linux/bcd.h>
+#include <linux/bitfield.h>
+#include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/hwmon.h>
 #include <linux/i2c.h>
@@ -31,6 +33,8 @@
 #define ISL12022_REG_SR                0x07
 #define ISL12022_REG_INT       0x08
 
+#define ISL12022_REG_PWR_VBAT  0x0a
+
 #define ISL12022_REG_BETA      0x0d
 #define ISL12022_REG_TEMP_L    0x28
 
 #define ISL12022_SR_LBAT75     (1 << 1)
 
 #define ISL12022_INT_WRTC      (1 << 6)
+#define ISL12022_INT_FO_MASK   GENMASK(3, 0)
+#define ISL12022_INT_FO_OFF    0x0
+#define ISL12022_INT_FO_32K    0x1
+
+#define ISL12022_REG_VB85_MASK GENMASK(5, 3)
+#define ISL12022_REG_VB75_MASK GENMASK(2, 0)
 
 #define ISL12022_BETA_TSE      (1 << 7)
 
@@ -141,12 +151,6 @@ static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
        if (ret)
                return ret;
 
-       if (buf[ISL12022_REG_SR] & (ISL12022_SR_LBAT85 | ISL12022_SR_LBAT75)) {
-               dev_warn(dev,
-                        "voltage dropped below %u%%, date and time is not reliable.\n",
-                        buf[ISL12022_REG_SR] & ISL12022_SR_LBAT85 ? 85 : 75);
-       }
-
        dev_dbg(dev,
                "raw data is sec=%02x, min=%02x, hr=%02x, mday=%02x, mon=%02x, year=%02x, wday=%02x, sr=%02x, int=%02x",
                buf[ISL12022_REG_SC],
@@ -204,7 +208,34 @@ static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
        return regmap_bulk_write(regmap, ISL12022_REG_SC, buf, sizeof(buf));
 }
 
+static int isl12022_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+       struct regmap *regmap = dev_get_drvdata(dev);
+       u32 user, val;
+       int ret;
+
+       switch (cmd) {
+       case RTC_VL_READ:
+               ret = regmap_read(regmap, ISL12022_REG_SR, &val);
+               if (ret)
+                       return ret;
+
+               user = 0;
+               if (val & ISL12022_SR_LBAT85)
+                       user |= RTC_VL_BACKUP_LOW;
+
+               if (val & ISL12022_SR_LBAT75)
+                       user |= RTC_VL_BACKUP_EMPTY;
+
+               return put_user(user, (u32 __user *)arg);
+
+       default:
+               return -ENOIOCTLCMD;
+       }
+}
+
 static const struct rtc_class_ops isl12022_rtc_ops = {
+       .ioctl          = isl12022_rtc_ioctl,
        .read_time      = isl12022_rtc_read_time,
        .set_time       = isl12022_rtc_set_time,
 };
@@ -215,10 +246,88 @@ static const struct regmap_config regmap_config = {
        .use_single_write = true,
 };
 
+static int isl12022_register_clock(struct device *dev)
+{
+       struct regmap *regmap = dev_get_drvdata(dev);
+       struct clk_hw *hw;
+       int ret;
+
+       if (!device_property_present(dev, "#clock-cells")) {
+               /*
+                * Disabling the F_OUT pin reduces the power
+                * consumption in battery mode by ~25%.
+                */
+               regmap_update_bits(regmap, ISL12022_REG_INT, ISL12022_INT_FO_MASK,
+                                  ISL12022_INT_FO_OFF);
+
+               return 0;
+       }
+
+       if (!IS_ENABLED(CONFIG_COMMON_CLK))
+               return 0;
+
+       /*
+        * For now, only support a fixed clock of 32768Hz (the reset default).
+        */
+       ret = regmap_update_bits(regmap, ISL12022_REG_INT,
+                                ISL12022_INT_FO_MASK, ISL12022_INT_FO_32K);
+       if (ret)
+               return ret;
+
+       hw = devm_clk_hw_register_fixed_rate(dev, "isl12022", NULL, 0, 32768);
+       if (IS_ERR(hw))
+               return PTR_ERR(hw);
+
+       return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw);
+}
+
+static const u32 trip_levels[2][7] = {
+       { 2125000, 2295000, 2550000, 2805000, 3060000, 4250000, 4675000 },
+       { 1875000, 2025000, 2250000, 2475000, 2700000, 3750000, 4125000 },
+};
+
+static void isl12022_set_trip_levels(struct device *dev)
+{
+       struct regmap *regmap = dev_get_drvdata(dev);
+       u32 levels[2] = {0, 0};
+       int ret, i, j, x[2];
+       u8 val, mask;
+
+       device_property_read_u32_array(dev, "isil,battery-trip-levels-microvolt",
+                                      levels, 2);
+
+       for (i = 0; i < 2; i++) {
+               for (j = 0; j < ARRAY_SIZE(trip_levels[i]) - 1; j++) {
+                       if (levels[i] <= trip_levels[i][j])
+                               break;
+               }
+               x[i] = j;
+       }
+
+       val = FIELD_PREP(ISL12022_REG_VB85_MASK, x[0]) |
+               FIELD_PREP(ISL12022_REG_VB75_MASK, x[1]);
+       mask = ISL12022_REG_VB85_MASK | ISL12022_REG_VB75_MASK;
+
+       ret = regmap_update_bits(regmap, ISL12022_REG_PWR_VBAT, mask, val);
+       if (ret)
+               dev_warn(dev, "unable to set battery alarm levels: %d\n", ret);
+
+       /*
+        * Force a write of the TSE bit in the BETA register, in order
+        * to trigger an update of the LBAT75 and LBAT85 bits in the
+        * status register. In battery backup mode, those bits have
+        * another meaning, so without this, they may contain stale
+        * values for up to a minute after power-on.
+        */
+       regmap_write_bits(regmap, ISL12022_REG_BETA,
+                         ISL12022_BETA_TSE, ISL12022_BETA_TSE);
+}
+
 static int isl12022_probe(struct i2c_client *client)
 {
        struct rtc_device *rtc;
        struct regmap *regmap;
+       int ret;
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
                return -ENODEV;
@@ -231,6 +340,11 @@ static int isl12022_probe(struct i2c_client *client)
 
        dev_set_drvdata(&client->dev, regmap);
 
+       ret = isl12022_register_clock(&client->dev);
+       if (ret)
+               return ret;
+
+       isl12022_set_trip_levels(&client->dev);
        isl12022_hwmon_register(&client->dev);
 
        rtc = devm_rtc_allocate_device(&client->dev);
index 5abff5d..2aabb91 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/mutex.h>
 #include <linux/nvmem-provider.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
 
@@ -429,7 +428,7 @@ static void isl12026_force_power_modes(struct i2c_client *client)
        }
 }
 
-static int isl12026_probe_new(struct i2c_client *client)
+static int isl12026_probe(struct i2c_client *client)
 {
        struct isl12026 *priv;
        int ret;
@@ -490,7 +489,7 @@ static struct i2c_driver isl12026_driver = {
                .name   = "rtc-isl12026",
                .of_match_table = isl12026_dt_match,
        },
-       .probe          = isl12026_probe_new,
+       .probe          = isl12026_probe,
        .remove         = isl12026_remove,
 };
 
index b0712b4..e50c23e 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/clk.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/rtc.h>
 
@@ -188,7 +188,7 @@ isl1208_i2c_validate_client(struct i2c_client *client)
 static int isl1208_set_xtoscb(struct i2c_client *client, int sr, int xtosb_val)
 {
        /* Do nothing if bit is already set to desired value */
-       if ((sr & ISL1208_REG_SR_XTOSCB) == xtosb_val)
+       if (!!(sr & ISL1208_REG_SR_XTOSCB) == xtosb_val)
                return 0;
 
        if (xtosb_val)
@@ -862,17 +862,9 @@ isl1208_probe(struct i2c_client *client)
        i2c_set_clientdata(client, isl1208);
 
        /* Determine which chip we have */
-       if (client->dev.of_node) {
-               isl1208->config = of_device_get_match_data(&client->dev);
-               if (!isl1208->config)
-                       return -ENODEV;
-       } else {
-               const struct i2c_device_id *id = i2c_match_id(isl1208_id, client);
-
-               if (!id)
-                       return -ENODEV;
-               isl1208->config = (struct isl1208_config *)id->driver_data;
-       }
+       isl1208->config = i2c_get_match_data(client);
+       if (!isl1208->config)
+               return -ENODEV;
 
        rc = isl1208_clk_present(client, "xin");
        if (rc < 0)
@@ -952,7 +944,6 @@ isl1208_probe(struct i2c_client *client)
                rc = isl1208_setup_irq(client, client->irq);
                if (rc)
                        return rc;
-
        } else {
                clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, isl1208->rtc->features);
        }
index 36453b0..bafa7d1 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
 #include <linux/property.h>
@@ -349,7 +349,7 @@ static int jz4740_rtc_probe(struct platform_device *pdev)
        if (!rtc)
                return -ENOMEM;
 
-       rtc->type = (enum jz4740_rtc_type)device_get_match_data(dev);
+       rtc->type = (uintptr_t)device_get_match_data(dev);
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
index a4612e5..df17c48 100644 (file)
@@ -9,9 +9,8 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
index 3cc5151..866489a 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
index 481c952..dd4a62e 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
 #include <linux/bcd.h>
@@ -269,9 +270,16 @@ static int m48t86_rtc_probe(struct platform_device *pdev)
        return 0;
 }
 
+static const struct of_device_id m48t86_rtc_of_ids[] = {
+       { .compatible = "st,m48t86" },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, m48t86_rtc_of_ids);
+
 static struct platform_driver m48t86_rtc_platform_driver = {
        .driver         = {
                .name   = "rtc-m48t86",
+               .of_match_table = m48t86_rtc_of_ids,
        },
        .probe          = m48t86_rtc_probe,
 };
index 07df43e..28858fc 100644 (file)
 #include <linux/module.h>
 #include <linux/rtc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 
index 1d297af..1617063 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/mfd/mt6397/core.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
index 81857a4..094c649 100644 (file)
@@ -7,9 +7,9 @@
 
 #include <linux/clk.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
index 762cf03..dbb935d 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/pm_wakeirq.h>
 #include <linux/clk.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #define RTC_INPUT_CLK_32768HZ  (0x00 << 5)
 #define RTC_INPUT_CLK_32000HZ  (0x01 << 5)
index a4e3f92..ed4e606 100644 (file)
@@ -538,7 +538,7 @@ MODULE_DEVICE_TABLE(of, nct3018y_of_match);
 static struct i2c_driver nct3018y_driver = {
        .driver         = {
                .name   = "rtc-nct3018y",
-               .of_match_table = of_match_ptr(nct3018y_of_match),
+               .of_match_table = nct3018y_of_match,
        },
        .probe          = nct3018y_probe,
        .id_table       = nct3018y_id,
index 8ae4d78..5b10ab0 100644 (file)
@@ -747,12 +747,12 @@ static int omap_rtc_probe(struct platform_device *pdev)
        }
 
        rtc->irq_timer = platform_get_irq(pdev, 0);
-       if (rtc->irq_timer <= 0)
-               return -ENOENT;
+       if (rtc->irq_timer < 0)
+               return rtc->irq_timer;
 
        rtc->irq_alarm = platform_get_irq(pdev, 1);
-       if (rtc->irq_alarm <= 0)
-               return -ENOENT;
+       if (rtc->irq_alarm < 0)
+               return rtc->irq_alarm;
 
        rtc->clk = devm_clk_get(&pdev->dev, "ext-clk");
        if (!IS_ERR(rtc->clk))
index ee03b04..9c04c4e 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * An I2C and SPI driver for the NXP PCF2127/29 RTC
+ * An I2C and SPI driver for the NXP PCF2127/29/31 RTC
  * Copyright 2013 Til-Technologies
  *
  * Author: Renaud Cerrato <r.cerrato@til-technologies.fr>
@@ -8,9 +8,13 @@
  * Watchdog and tamper functions
  * Author: Bruno Thomsen <bruno.thomsen@gmail.com>
  *
+ * PCF2131 support
+ * Author: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+ *
  * based on the other drivers in this same directory.
  *
- * Datasheet: https://www.nxp.com/docs/en/data-sheet/PCF2127.pdf
+ * Datasheets: https://www.nxp.com/docs/en/data-sheet/PCF2127.pdf
+ *             https://www.nxp.com/docs/en/data-sheet/PCF2131DS.pdf
  */
 
 #include <linux/i2c.h>
@@ -21,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/watchdog.h>
 
@@ -28,6 +33,7 @@
 #define PCF2127_REG_CTRL1              0x00
 #define PCF2127_BIT_CTRL1_POR_OVRD             BIT(3)
 #define PCF2127_BIT_CTRL1_TSF1                 BIT(4)
+#define PCF2127_BIT_CTRL1_STOP                 BIT(5)
 /* Control register 2 */
 #define PCF2127_REG_CTRL2              0x01
 #define PCF2127_BIT_CTRL2_AIE                  BIT(1)
 #define PCF2127_BIT_CTRL3_BF                   BIT(3)
 #define PCF2127_BIT_CTRL3_BTSE                 BIT(4)
 /* Time and date registers */
-#define PCF2127_REG_SC                 0x03
+#define PCF2127_REG_TIME_BASE          0x03
 #define PCF2127_BIT_SC_OSF                     BIT(7)
-#define PCF2127_REG_MN                 0x04
-#define PCF2127_REG_HR                 0x05
-#define PCF2127_REG_DM                 0x06
-#define PCF2127_REG_DW                 0x07
-#define PCF2127_REG_MO                 0x08
-#define PCF2127_REG_YR                 0x09
 /* Alarm registers */
-#define PCF2127_REG_ALARM_SC           0x0A
-#define PCF2127_REG_ALARM_MN           0x0B
-#define PCF2127_REG_ALARM_HR           0x0C
-#define PCF2127_REG_ALARM_DM           0x0D
-#define PCF2127_REG_ALARM_DW           0x0E
+#define PCF2127_REG_ALARM_BASE         0x0A
 #define PCF2127_BIT_ALARM_AE                   BIT(7)
 /* CLKOUT control register */
 #define PCF2127_REG_CLKOUT             0x0f
 #define PCF2127_BIT_WD_CTL_CD0                 BIT(6)
 #define PCF2127_BIT_WD_CTL_CD1                 BIT(7)
 #define PCF2127_REG_WD_VAL             0x11
-/* Tamper timestamp registers */
-#define PCF2127_REG_TS_CTRL            0x12
+/* Tamper timestamp1 registers */
+#define PCF2127_REG_TS1_BASE           0x12
 #define PCF2127_BIT_TS_CTRL_TSOFF              BIT(6)
 #define PCF2127_BIT_TS_CTRL_TSM                        BIT(7)
-#define PCF2127_REG_TS_SC              0x13
-#define PCF2127_REG_TS_MN              0x14
-#define PCF2127_REG_TS_HR              0x15
-#define PCF2127_REG_TS_DM              0x16
-#define PCF2127_REG_TS_MO              0x17
-#define PCF2127_REG_TS_YR              0x18
 /*
  * RAM registers
  * PCF2127 has 512 bytes general-purpose static RAM (SRAM) that is
  * battery backed and can survive a power outage.
- * PCF2129 doesn't have this feature.
+ * PCF2129/31 doesn't have this feature.
  */
 #define PCF2127_REG_RAM_ADDR_MSB       0x1A
 #define PCF2127_REG_RAM_WRT_CMD                0x1C
 
 /* Watchdog timer value constants */
 #define PCF2127_WD_VAL_STOP            0
-#define PCF2127_WD_VAL_MIN             2
-#define PCF2127_WD_VAL_MAX             255
-#define PCF2127_WD_VAL_DEFAULT         60
+/* PCF2127/29 watchdog timer value constants */
+#define PCF2127_WD_CLOCK_HZ_X1000      1000 /* 1Hz */
+#define PCF2127_WD_MIN_HW_HEARTBEAT_MS 500
+/* PCF2131 watchdog timer value constants */
+#define PCF2131_WD_CLOCK_HZ_X1000      250  /* 1/4Hz */
+#define PCF2131_WD_MIN_HW_HEARTBEAT_MS 4000
+
+#define PCF2127_WD_DEFAULT_TIMEOUT_S   60
 
 /* Mask for currently enabled interrupts */
 #define PCF2127_CTRL1_IRQ_MASK (PCF2127_BIT_CTRL1_TSF1)
                PCF2127_BIT_CTRL2_WDTF | \
                PCF2127_BIT_CTRL2_TSF2)
 
+#define PCF2127_MAX_TS_SUPPORTED       4
+
+/* Control register 4 */
+#define PCF2131_REG_CTRL4              0x03
+#define PCF2131_BIT_CTRL4_TSF4                 BIT(4)
+#define PCF2131_BIT_CTRL4_TSF3                 BIT(5)
+#define PCF2131_BIT_CTRL4_TSF2                 BIT(6)
+#define PCF2131_BIT_CTRL4_TSF1                 BIT(7)
+/* Control register 5 */
+#define PCF2131_REG_CTRL5              0x04
+#define PCF2131_BIT_CTRL5_TSIE4                        BIT(4)
+#define PCF2131_BIT_CTRL5_TSIE3                        BIT(5)
+#define PCF2131_BIT_CTRL5_TSIE2                        BIT(6)
+#define PCF2131_BIT_CTRL5_TSIE1                        BIT(7)
+/* Software reset register */
+#define PCF2131_REG_SR_RESET           0x05
+#define PCF2131_SR_RESET_READ_PATTERN  (BIT(2) | BIT(5))
+#define PCF2131_SR_RESET_CPR_CMD       (PCF2131_SR_RESET_READ_PATTERN | BIT(7))
+/* Time and date registers */
+#define PCF2131_REG_TIME_BASE          0x07
+/* Alarm registers */
+#define PCF2131_REG_ALARM_BASE         0x0E
+/* CLKOUT control register */
+#define PCF2131_REG_CLKOUT             0x13
+/* Watchdog registers */
+#define PCF2131_REG_WD_CTL             0x35
+#define PCF2131_REG_WD_VAL             0x36
+/* Tamper timestamp1 registers */
+#define PCF2131_REG_TS1_BASE           0x14
+/* Tamper timestamp2 registers */
+#define PCF2131_REG_TS2_BASE           0x1B
+/* Tamper timestamp3 registers */
+#define PCF2131_REG_TS3_BASE           0x22
+/* Tamper timestamp4 registers */
+#define PCF2131_REG_TS4_BASE           0x29
+/* Interrupt mask registers */
+#define PCF2131_REG_INT_A_MASK1                0x31
+#define PCF2131_REG_INT_A_MASK2                0x32
+#define PCF2131_REG_INT_B_MASK1                0x33
+#define PCF2131_REG_INT_B_MASK2                0x34
+#define PCF2131_BIT_INT_BLIE           BIT(0)
+#define PCF2131_BIT_INT_BIE            BIT(1)
+#define PCF2131_BIT_INT_AIE            BIT(2)
+#define PCF2131_BIT_INT_WD_CD          BIT(3)
+#define PCF2131_BIT_INT_SI             BIT(4)
+#define PCF2131_BIT_INT_MI             BIT(5)
+#define PCF2131_CTRL2_IRQ_MASK ( \
+               PCF2127_BIT_CTRL2_AF | \
+               PCF2127_BIT_CTRL2_WDTF)
+#define PCF2131_CTRL4_IRQ_MASK ( \
+               PCF2131_BIT_CTRL4_TSF4 | \
+               PCF2131_BIT_CTRL4_TSF3 | \
+               PCF2131_BIT_CTRL4_TSF2 | \
+               PCF2131_BIT_CTRL4_TSF1)
+
+enum pcf21xx_type {
+       PCF2127,
+       PCF2129,
+       PCF2131,
+       PCF21XX_LAST_ID
+};
+
+struct pcf21xx_ts_config {
+       u8 reg_base; /* Base register to read timestamp values. */
+
+       /*
+        * If the TS input pin is driven to GND, an interrupt can be generated
+        * (supported by all variants).
+        */
+       u8 gnd_detect_reg; /* Interrupt control register address. */
+       u8 gnd_detect_bit; /* Interrupt bit. */
+
+       /*
+        * If the TS input pin is driven to an intermediate level between GND
+        * and supply, an interrupt can be generated (optional feature depending
+        * on variant).
+        */
+       u8 inter_detect_reg; /* Interrupt control register address. */
+       u8 inter_detect_bit; /* Interrupt bit. */
+
+       u8 ie_reg; /* Interrupt enable control register. */
+       u8 ie_bit; /* Interrupt enable bit. */
+};
+
+struct pcf21xx_config {
+       int type; /* IC variant */
+       int max_register;
+       unsigned int has_nvmem:1;
+       unsigned int has_bit_wd_ctl_cd0:1;
+       unsigned int wd_val_reg_readable:1; /* If watchdog value register can be read. */
+       unsigned int has_int_a_b:1; /* PCF2131 supports two interrupt outputs. */
+       u8 reg_time_base; /* Time/date base register. */
+       u8 regs_alarm_base; /* Alarm function base registers. */
+       u8 reg_wd_ctl; /* Watchdog control register. */
+       u8 reg_wd_val; /* Watchdog value register. */
+       u8 reg_clkout; /* Clkout register. */
+       int wdd_clock_hz_x1000; /* Watchdog clock in Hz multiplicated by 1000 */
+       int wdd_min_hw_heartbeat_ms;
+       unsigned int ts_count;
+       struct pcf21xx_ts_config ts[PCF2127_MAX_TS_SUPPORTED];
+       struct attribute_group attribute_group;
+};
+
 struct pcf2127 {
        struct rtc_device *rtc;
        struct watchdog_device wdd;
        struct regmap *regmap;
-       time64_t ts;
-       bool ts_valid;
+       const struct pcf21xx_config *cfg;
        bool irq_enabled;
+       time64_t ts[PCF2127_MAX_TS_SUPPORTED]; /* Timestamp values. */
+       bool ts_valid[PCF2127_MAX_TS_SUPPORTED];  /* Timestamp valid indication. */
 };
 
 /*
@@ -117,27 +216,22 @@ struct pcf2127 {
 static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
-       unsigned char buf[10];
+       unsigned char buf[7];
        int ret;
 
        /*
         * Avoid reading CTRL2 register as it causes WD_VAL register
         * value to reset to 0 which means watchdog is stopped.
         */
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL3,
-                              (buf + PCF2127_REG_CTRL3),
-                              ARRAY_SIZE(buf) - PCF2127_REG_CTRL3);
+       ret = regmap_bulk_read(pcf2127->regmap, pcf2127->cfg->reg_time_base,
+                              buf, sizeof(buf));
        if (ret) {
                dev_err(dev, "%s: read error\n", __func__);
                return ret;
        }
 
-       if (buf[PCF2127_REG_CTRL3] & PCF2127_BIT_CTRL3_BLF)
-               dev_info(dev,
-                       "low voltage detected, check/replace RTC battery.\n");
-
        /* Clock integrity is not guaranteed when OSF flag is set. */
-       if (buf[PCF2127_REG_SC] & PCF2127_BIT_SC_OSF) {
+       if (buf[0] & PCF2127_BIT_SC_OSF) {
                /*
                 * no need clear the flag here,
                 * it will be cleared once the new date is saved
@@ -148,20 +242,17 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
        }
 
        dev_dbg(dev,
-               "%s: raw data is cr3=%02x, sec=%02x, min=%02x, hr=%02x, "
+               "%s: raw data is sec=%02x, min=%02x, hr=%02x, "
                "mday=%02x, wday=%02x, mon=%02x, year=%02x\n",
-               __func__, buf[PCF2127_REG_CTRL3], buf[PCF2127_REG_SC],
-               buf[PCF2127_REG_MN], buf[PCF2127_REG_HR],
-               buf[PCF2127_REG_DM], buf[PCF2127_REG_DW],
-               buf[PCF2127_REG_MO], buf[PCF2127_REG_YR]);
-
-       tm->tm_sec = bcd2bin(buf[PCF2127_REG_SC] & 0x7F);
-       tm->tm_min = bcd2bin(buf[PCF2127_REG_MN] & 0x7F);
-       tm->tm_hour = bcd2bin(buf[PCF2127_REG_HR] & 0x3F); /* rtc hr 0-23 */
-       tm->tm_mday = bcd2bin(buf[PCF2127_REG_DM] & 0x3F);
-       tm->tm_wday = buf[PCF2127_REG_DW] & 0x07;
-       tm->tm_mon = bcd2bin(buf[PCF2127_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
-       tm->tm_year = bcd2bin(buf[PCF2127_REG_YR]);
+               __func__, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6]);
+
+       tm->tm_sec = bcd2bin(buf[0] & 0x7F);
+       tm->tm_min = bcd2bin(buf[1] & 0x7F);
+       tm->tm_hour = bcd2bin(buf[2] & 0x3F);
+       tm->tm_mday = bcd2bin(buf[3] & 0x3F);
+       tm->tm_wday = buf[4] & 0x07;
+       tm->tm_mon = bcd2bin(buf[5] & 0x1F) - 1;
+       tm->tm_year = bcd2bin(buf[6]);
        tm->tm_year += 100;
 
        dev_dbg(dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
@@ -198,14 +289,45 @@ static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm)
        /* year */
        buf[i++] = bin2bcd(tm->tm_year - 100);
 
-       /* write register's data */
-       err = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_SC, buf, i);
+       /* Write access to time registers:
+        * PCF2127/29: no special action required.
+        * PCF2131:    requires setting the STOP and CPR bits. STOP bit needs to
+        *             be cleared after time registers are updated.
+        */
+       if (pcf2127->cfg->type == PCF2131) {
+               err = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+                                        PCF2127_BIT_CTRL1_STOP,
+                                        PCF2127_BIT_CTRL1_STOP);
+               if (err) {
+                       dev_dbg(dev, "setting STOP bit failed\n");
+                       return err;
+               }
+
+               err = regmap_write(pcf2127->regmap, PCF2131_REG_SR_RESET,
+                                  PCF2131_SR_RESET_CPR_CMD);
+               if (err) {
+                       dev_dbg(dev, "sending CPR cmd failed\n");
+                       return err;
+               }
+       }
+
+       /* write time register's data */
+       err = regmap_bulk_write(pcf2127->regmap, pcf2127->cfg->reg_time_base, buf, i);
        if (err) {
-               dev_err(dev,
-                       "%s: err=%d", __func__, err);
+               dev_dbg(dev, "%s: err=%d", __func__, err);
                return err;
        }
 
+       if (pcf2127->cfg->type == PCF2131) {
+               /* Clear STOP bit (PCF2131 only) after write is completed. */
+               err = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+                                        PCF2127_BIT_CTRL1_STOP, 0);
+               if (err) {
+                       dev_dbg(dev, "clearing STOP bit failed\n");
+                       return err;
+               }
+       }
+
        return 0;
 }
 
@@ -275,9 +397,16 @@ static int pcf2127_nvmem_write(void *priv, unsigned int offset,
 
 static int pcf2127_wdt_ping(struct watchdog_device *wdd)
 {
+       int wd_val;
        struct pcf2127 *pcf2127 = watchdog_get_drvdata(wdd);
 
-       return regmap_write(pcf2127->regmap, PCF2127_REG_WD_VAL, wdd->timeout);
+       /*
+        * Compute counter value of WATCHDG_TIM_VAL to obtain desired period
+        * in seconds, depending on the source clock frequency.
+        */
+       wd_val = ((wdd->timeout * pcf2127->cfg->wdd_clock_hz_x1000) / 1000) + 1;
+
+       return regmap_write(pcf2127->regmap, pcf2127->cfg->reg_wd_val, wd_val);
 }
 
 /*
@@ -311,7 +440,7 @@ static int pcf2127_wdt_stop(struct watchdog_device *wdd)
 {
        struct pcf2127 *pcf2127 = watchdog_get_drvdata(wdd);
 
-       return regmap_write(pcf2127->regmap, PCF2127_REG_WD_VAL,
+       return regmap_write(pcf2127->regmap, pcf2127->cfg->reg_wd_val,
                            PCF2127_WD_VAL_STOP);
 }
 
@@ -339,9 +468,25 @@ static const struct watchdog_ops pcf2127_watchdog_ops = {
        .set_timeout = pcf2127_wdt_set_timeout,
 };
 
+/*
+ * Compute watchdog period, t, in seconds, from the WATCHDG_TIM_VAL register
+ * value, n, and the clock frequency, f1000, in Hz x 1000.
+ *
+ * The PCF2127/29 datasheet gives t as:
+ *   t = n / f
+ * The PCF2131 datasheet gives t as:
+ *   t = (n - 1) / f
+ * For both variants, the watchdog is triggered when the WATCHDG_TIM_VAL reaches
+ * the value 1, and not zero. Consequently, the equation from the PCF2131
+ * datasheet seems to be the correct one for both variants.
+ */
+static int pcf2127_watchdog_get_period(int n, int f1000)
+{
+       return (1000 * (n - 1)) / f1000;
+}
+
 static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127)
 {
-       u32 wdd_timeout;
        int ret;
 
        if (!IS_ENABLED(CONFIG_WATCHDOG) ||
@@ -351,21 +496,35 @@ static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127)
        pcf2127->wdd.parent = dev;
        pcf2127->wdd.info = &pcf2127_wdt_info;
        pcf2127->wdd.ops = &pcf2127_watchdog_ops;
-       pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN;
-       pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX;
-       pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT;
-       pcf2127->wdd.min_hw_heartbeat_ms = 500;
+
+       pcf2127->wdd.min_timeout =
+               pcf2127_watchdog_get_period(
+                       2, pcf2127->cfg->wdd_clock_hz_x1000);
+       pcf2127->wdd.max_timeout =
+               pcf2127_watchdog_get_period(
+                       255, pcf2127->cfg->wdd_clock_hz_x1000);
+       pcf2127->wdd.timeout = PCF2127_WD_DEFAULT_TIMEOUT_S;
+
+       dev_dbg(dev, "%s clock = %d Hz / 1000\n", __func__,
+               pcf2127->cfg->wdd_clock_hz_x1000);
+
+       pcf2127->wdd.min_hw_heartbeat_ms = pcf2127->cfg->wdd_min_hw_heartbeat_ms;
        pcf2127->wdd.status = WATCHDOG_NOWAYOUT_INIT_STATUS;
 
        watchdog_set_drvdata(&pcf2127->wdd, pcf2127);
 
        /* Test if watchdog timer is started by bootloader */
-       ret = regmap_read(pcf2127->regmap, PCF2127_REG_WD_VAL, &wdd_timeout);
-       if (ret)
-               return ret;
+       if (pcf2127->cfg->wd_val_reg_readable) {
+               u32 wdd_timeout;
 
-       if (wdd_timeout)
-               set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status);
+               ret = regmap_read(pcf2127->regmap, pcf2127->cfg->reg_wd_val,
+                                 &wdd_timeout);
+               if (ret)
+                       return ret;
+
+               if (wdd_timeout)
+                       set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status);
+       }
 
        return devm_watchdog_register_device(dev, &pcf2127->wdd);
 }
@@ -386,8 +545,8 @@ static int pcf2127_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        if (ret)
                return ret;
 
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf,
-                              sizeof(buf));
+       ret = regmap_bulk_read(pcf2127->regmap, pcf2127->cfg->regs_alarm_base,
+                              buf, sizeof(buf));
        if (ret)
                return ret;
 
@@ -437,8 +596,8 @@ static int pcf2127_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        buf[3] = bin2bcd(alrm->time.tm_mday);
        buf[4] = PCF2127_BIT_ALARM_AE; /* Do not match on week day */
 
-       ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf,
-                               sizeof(buf));
+       ret = regmap_bulk_write(pcf2127->regmap, pcf2127->cfg->regs_alarm_base,
+                               buf, sizeof(buf));
        if (ret)
                return ret;
 
@@ -446,38 +605,35 @@ static int pcf2127_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 }
 
 /*
- * This function reads ctrl2 register, caller is responsible for calling
- * pcf2127_wdt_active_ping()
+ * This function reads one timestamp function data, caller is responsible for
+ * calling pcf2127_wdt_active_ping()
  */
-static int pcf2127_rtc_ts_read(struct device *dev, time64_t *ts)
+static int pcf2127_rtc_ts_read(struct device *dev, time64_t *ts,
+                              int ts_id)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
        struct rtc_time tm;
        int ret;
-       unsigned char data[25];
+       unsigned char data[7];
 
-       ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL1, data,
-                              sizeof(data));
+       ret = regmap_bulk_read(pcf2127->regmap, pcf2127->cfg->ts[ts_id].reg_base,
+                              data, sizeof(data));
        if (ret) {
                dev_err(dev, "%s: read error ret=%d\n", __func__, ret);
                return ret;
        }
 
        dev_dbg(dev,
-               "%s: raw data is cr1=%02x, cr2=%02x, cr3=%02x, ts_sc=%02x, ts_mn=%02x, ts_hr=%02x, ts_dm=%02x, ts_mo=%02x, ts_yr=%02x\n",
-               __func__, data[PCF2127_REG_CTRL1], data[PCF2127_REG_CTRL2],
-               data[PCF2127_REG_CTRL3], data[PCF2127_REG_TS_SC],
-               data[PCF2127_REG_TS_MN], data[PCF2127_REG_TS_HR],
-               data[PCF2127_REG_TS_DM], data[PCF2127_REG_TS_MO],
-               data[PCF2127_REG_TS_YR]);
-
-       tm.tm_sec = bcd2bin(data[PCF2127_REG_TS_SC] & 0x7F);
-       tm.tm_min = bcd2bin(data[PCF2127_REG_TS_MN] & 0x7F);
-       tm.tm_hour = bcd2bin(data[PCF2127_REG_TS_HR] & 0x3F);
-       tm.tm_mday = bcd2bin(data[PCF2127_REG_TS_DM] & 0x3F);
+               "%s: raw data is ts_sc=%02x, ts_mn=%02x, ts_hr=%02x, ts_dm=%02x, ts_mo=%02x, ts_yr=%02x\n",
+               __func__, data[1], data[2], data[3], data[4], data[5], data[6]);
+
+       tm.tm_sec = bcd2bin(data[1] & 0x7F);
+       tm.tm_min = bcd2bin(data[2] & 0x7F);
+       tm.tm_hour = bcd2bin(data[3] & 0x3F);
+       tm.tm_mday = bcd2bin(data[4] & 0x3F);
        /* TS_MO register (month) value range: 1-12 */
-       tm.tm_mon = bcd2bin(data[PCF2127_REG_TS_MO] & 0x1F) - 1;
-       tm.tm_year = bcd2bin(data[PCF2127_REG_TS_YR]);
+       tm.tm_mon = bcd2bin(data[5] & 0x1F) - 1;
+       tm.tm_year = bcd2bin(data[6]);
        if (tm.tm_year < 70)
                tm.tm_year += 100; /* assume we are in 1970...2069 */
 
@@ -491,47 +647,84 @@ static int pcf2127_rtc_ts_read(struct device *dev, time64_t *ts)
        return 0;
 };
 
-static void pcf2127_rtc_ts_snapshot(struct device *dev)
+static void pcf2127_rtc_ts_snapshot(struct device *dev, int ts_id)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
        int ret;
 
+       if (ts_id >= pcf2127->cfg->ts_count)
+               return;
+
        /* Let userspace read the first timestamp */
-       if (pcf2127->ts_valid)
+       if (pcf2127->ts_valid[ts_id])
                return;
 
-       ret = pcf2127_rtc_ts_read(dev, &pcf2127->ts);
+       ret = pcf2127_rtc_ts_read(dev, &pcf2127->ts[ts_id], ts_id);
        if (!ret)
-               pcf2127->ts_valid = true;
+               pcf2127->ts_valid[ts_id] = true;
 }
 
 static irqreturn_t pcf2127_rtc_irq(int irq, void *dev)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
-       unsigned int ctrl1, ctrl2;
+       unsigned int ctrl2;
        int ret = 0;
 
-       ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1, &ctrl1);
-       if (ret)
-               return IRQ_NONE;
-
        ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2);
        if (ret)
                return IRQ_NONE;
 
-       if (!(ctrl1 & PCF2127_CTRL1_IRQ_MASK || ctrl2 & PCF2127_CTRL2_IRQ_MASK))
-               return IRQ_NONE;
+       if (pcf2127->cfg->ts_count == 1) {
+               /* PCF2127/29 */
+               unsigned int ctrl1;
+
+               ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1, &ctrl1);
+               if (ret)
+                       return IRQ_NONE;
+
+               if (!(ctrl1 & PCF2127_CTRL1_IRQ_MASK || ctrl2 & PCF2127_CTRL2_IRQ_MASK))
+                       return IRQ_NONE;
+
+               if (ctrl1 & PCF2127_BIT_CTRL1_TSF1 || ctrl2 & PCF2127_BIT_CTRL2_TSF2)
+                       pcf2127_rtc_ts_snapshot(dev, 0);
+
+               if (ctrl1 & PCF2127_CTRL1_IRQ_MASK)
+                       regmap_write(pcf2127->regmap, PCF2127_REG_CTRL1,
+                                    ctrl1 & ~PCF2127_CTRL1_IRQ_MASK);
+
+               if (ctrl2 & PCF2127_CTRL2_IRQ_MASK)
+                       regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2,
+                                    ctrl2 & ~PCF2127_CTRL2_IRQ_MASK);
+       } else {
+               /* PCF2131. */
+               unsigned int ctrl4;
+
+               ret = regmap_read(pcf2127->regmap, PCF2131_REG_CTRL4, &ctrl4);
+               if (ret)
+                       return IRQ_NONE;
+
+               if (!(ctrl4 & PCF2131_CTRL4_IRQ_MASK || ctrl2 & PCF2131_CTRL2_IRQ_MASK))
+                       return IRQ_NONE;
 
-       if (ctrl1 & PCF2127_BIT_CTRL1_TSF1 || ctrl2 & PCF2127_BIT_CTRL2_TSF2)
-               pcf2127_rtc_ts_snapshot(dev);
+               if (ctrl4 & PCF2131_CTRL4_IRQ_MASK) {
+                       int i;
+                       int tsf_bit = PCF2131_BIT_CTRL4_TSF1; /* Start at bit 7. */
 
-       if (ctrl1 & PCF2127_CTRL1_IRQ_MASK)
-               regmap_write(pcf2127->regmap, PCF2127_REG_CTRL1,
-                       ctrl1 & ~PCF2127_CTRL1_IRQ_MASK);
+                       for (i = 0; i < pcf2127->cfg->ts_count; i++) {
+                               if (ctrl4 & tsf_bit)
+                                       pcf2127_rtc_ts_snapshot(dev, i);
 
-       if (ctrl2 & PCF2127_CTRL2_IRQ_MASK)
-               regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2,
-                       ctrl2 & ~PCF2127_CTRL2_IRQ_MASK);
+                               tsf_bit = tsf_bit >> 1;
+                       }
+
+                       regmap_write(pcf2127->regmap, PCF2131_REG_CTRL4,
+                                    ctrl4 & ~PCF2131_CTRL4_IRQ_MASK);
+               }
+
+               if (ctrl2 & PCF2131_CTRL2_IRQ_MASK)
+                       regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2,
+                                    ctrl2 & ~PCF2131_CTRL2_IRQ_MASK);
+       }
 
        if (ctrl2 & PCF2127_BIT_CTRL2_AF)
                rtc_update_irq(pcf2127->rtc, 1, RTC_IRQF | RTC_AF);
@@ -552,28 +745,41 @@ static const struct rtc_class_ops pcf2127_rtc_ops = {
 
 /* sysfs interface */
 
-static ssize_t timestamp0_store(struct device *dev,
-                               struct device_attribute *attr,
-                               const char *buf, size_t count)
+static ssize_t timestamp_store(struct device *dev,
+                              struct device_attribute *attr,
+                              const char *buf, size_t count, int ts_id)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev->parent);
        int ret;
 
+       if (ts_id >= pcf2127->cfg->ts_count)
+               return 0;
+
        if (pcf2127->irq_enabled) {
-               pcf2127->ts_valid = false;
+               pcf2127->ts_valid[ts_id] = false;
        } else {
-               ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
-                       PCF2127_BIT_CTRL1_TSF1, 0);
+               /* Always clear GND interrupt bit. */
+               ret = regmap_update_bits(pcf2127->regmap,
+                                        pcf2127->cfg->ts[ts_id].gnd_detect_reg,
+                                        pcf2127->cfg->ts[ts_id].gnd_detect_bit,
+                                        0);
+
                if (ret) {
-                       dev_err(dev, "%s: update ctrl1 ret=%d\n", __func__, ret);
+                       dev_err(dev, "%s: update TS gnd detect ret=%d\n", __func__, ret);
                        return ret;
                }
 
-               ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2,
-                       PCF2127_BIT_CTRL2_TSF2, 0);
-               if (ret) {
-                       dev_err(dev, "%s: update ctrl2 ret=%d\n", __func__, ret);
-                       return ret;
+               if (pcf2127->cfg->ts[ts_id].inter_detect_bit) {
+                       /* Clear intermediate level interrupt bit if supported. */
+                       ret = regmap_update_bits(pcf2127->regmap,
+                                                pcf2127->cfg->ts[ts_id].inter_detect_reg,
+                                                pcf2127->cfg->ts[ts_id].inter_detect_bit,
+                                                0);
+                       if (ret) {
+                               dev_err(dev, "%s: update TS intermediate level detect ret=%d\n",
+                                       __func__, ret);
+                               return ret;
+                       }
                }
 
                ret = pcf2127_wdt_active_ping(&pcf2127->wdd);
@@ -582,34 +788,84 @@ static ssize_t timestamp0_store(struct device *dev,
        }
 
        return count;
+}
+
+static ssize_t timestamp0_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       return timestamp_store(dev, attr, buf, count, 0);
 };
 
-static ssize_t timestamp0_show(struct device *dev,
-                              struct device_attribute *attr, char *buf)
+static ssize_t timestamp1_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       return timestamp_store(dev, attr, buf, count, 1);
+};
+
+static ssize_t timestamp2_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       return timestamp_store(dev, attr, buf, count, 2);
+};
+
+static ssize_t timestamp3_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       return timestamp_store(dev, attr, buf, count, 3);
+};
+
+static ssize_t timestamp_show(struct device *dev,
+                             struct device_attribute *attr, char *buf,
+                             int ts_id)
 {
        struct pcf2127 *pcf2127 = dev_get_drvdata(dev->parent);
-       unsigned int ctrl1, ctrl2;
        int ret;
        time64_t ts;
 
+       if (ts_id >= pcf2127->cfg->ts_count)
+               return 0;
+
        if (pcf2127->irq_enabled) {
-               if (!pcf2127->ts_valid)
+               if (!pcf2127->ts_valid[ts_id])
                        return 0;
-               ts = pcf2127->ts;
+               ts = pcf2127->ts[ts_id];
        } else {
-               ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1, &ctrl1);
-               if (ret)
-                       return 0;
+               u8 valid_low = 0;
+               u8 valid_inter = 0;
+               unsigned int ctrl;
 
-               ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2);
+               /* Check if TS input pin is driven to GND, supported by all
+                * variants.
+                */
+               ret = regmap_read(pcf2127->regmap,
+                                 pcf2127->cfg->ts[ts_id].gnd_detect_reg,
+                                 &ctrl);
                if (ret)
                        return 0;
 
-               if (!(ctrl1 & PCF2127_BIT_CTRL1_TSF1) &&
-                   !(ctrl2 & PCF2127_BIT_CTRL2_TSF2))
+               valid_low = ctrl & pcf2127->cfg->ts[ts_id].gnd_detect_bit;
+
+               if (pcf2127->cfg->ts[ts_id].inter_detect_bit) {
+                       /* Check if TS input pin is driven to intermediate level
+                        * between GND and supply, if supported by variant.
+                        */
+                       ret = regmap_read(pcf2127->regmap,
+                                         pcf2127->cfg->ts[ts_id].inter_detect_reg,
+                                         &ctrl);
+                       if (ret)
+                               return 0;
+
+                       valid_inter = ctrl & pcf2127->cfg->ts[ts_id].inter_detect_bit;
+               }
+
+               if (!valid_low && !valid_inter)
                        return 0;
 
-               ret = pcf2127_rtc_ts_read(dev->parent, &ts);
+               ret = pcf2127_rtc_ts_read(dev->parent, &ts, ts_id);
                if (ret)
                        return 0;
 
@@ -618,21 +874,227 @@ static ssize_t timestamp0_show(struct device *dev,
                        return ret;
        }
        return sprintf(buf, "%llu\n", (unsigned long long)ts);
+}
+
+static ssize_t timestamp0_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       return timestamp_show(dev, attr, buf, 0);
+};
+
+static ssize_t timestamp1_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       return timestamp_show(dev, attr, buf, 1);
+};
+
+static ssize_t timestamp2_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       return timestamp_show(dev, attr, buf, 2);
+};
+
+static ssize_t timestamp3_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       return timestamp_show(dev, attr, buf, 3);
 };
 
 static DEVICE_ATTR_RW(timestamp0);
+static DEVICE_ATTR_RW(timestamp1);
+static DEVICE_ATTR_RW(timestamp2);
+static DEVICE_ATTR_RW(timestamp3);
 
 static struct attribute *pcf2127_attrs[] = {
        &dev_attr_timestamp0.attr,
        NULL
 };
 
-static const struct attribute_group pcf2127_attr_group = {
-       .attrs  = pcf2127_attrs,
+static struct attribute *pcf2131_attrs[] = {
+       &dev_attr_timestamp0.attr,
+       &dev_attr_timestamp1.attr,
+       &dev_attr_timestamp2.attr,
+       &dev_attr_timestamp3.attr,
+       NULL
 };
 
+static struct pcf21xx_config pcf21xx_cfg[] = {
+       [PCF2127] = {
+               .type = PCF2127,
+               .max_register = 0x1d,
+               .has_nvmem = 1,
+               .has_bit_wd_ctl_cd0 = 1,
+               .wd_val_reg_readable = 1,
+               .has_int_a_b = 0,
+               .reg_time_base = PCF2127_REG_TIME_BASE,
+               .regs_alarm_base = PCF2127_REG_ALARM_BASE,
+               .reg_wd_ctl = PCF2127_REG_WD_CTL,
+               .reg_wd_val = PCF2127_REG_WD_VAL,
+               .reg_clkout = PCF2127_REG_CLKOUT,
+               .wdd_clock_hz_x1000 = PCF2127_WD_CLOCK_HZ_X1000,
+               .wdd_min_hw_heartbeat_ms = PCF2127_WD_MIN_HW_HEARTBEAT_MS,
+               .ts_count = 1,
+               .ts[0] = {
+                       .reg_base  = PCF2127_REG_TS1_BASE,
+                       .gnd_detect_reg = PCF2127_REG_CTRL1,
+                       .gnd_detect_bit = PCF2127_BIT_CTRL1_TSF1,
+                       .inter_detect_reg = PCF2127_REG_CTRL2,
+                       .inter_detect_bit = PCF2127_BIT_CTRL2_TSF2,
+                       .ie_reg    = PCF2127_REG_CTRL2,
+                       .ie_bit    = PCF2127_BIT_CTRL2_TSIE,
+               },
+               .attribute_group = {
+                       .attrs  = pcf2127_attrs,
+               },
+       },
+       [PCF2129] = {
+               .type = PCF2129,
+               .max_register = 0x19,
+               .has_nvmem = 0,
+               .has_bit_wd_ctl_cd0 = 0,
+               .wd_val_reg_readable = 1,
+               .has_int_a_b = 0,
+               .reg_time_base = PCF2127_REG_TIME_BASE,
+               .regs_alarm_base = PCF2127_REG_ALARM_BASE,
+               .reg_wd_ctl = PCF2127_REG_WD_CTL,
+               .reg_wd_val = PCF2127_REG_WD_VAL,
+               .reg_clkout = PCF2127_REG_CLKOUT,
+               .wdd_clock_hz_x1000 = PCF2127_WD_CLOCK_HZ_X1000,
+               .wdd_min_hw_heartbeat_ms = PCF2127_WD_MIN_HW_HEARTBEAT_MS,
+               .ts_count = 1,
+               .ts[0] = {
+                       .reg_base  = PCF2127_REG_TS1_BASE,
+                       .gnd_detect_reg = PCF2127_REG_CTRL1,
+                       .gnd_detect_bit = PCF2127_BIT_CTRL1_TSF1,
+                       .inter_detect_reg = PCF2127_REG_CTRL2,
+                       .inter_detect_bit = PCF2127_BIT_CTRL2_TSF2,
+                       .ie_reg    = PCF2127_REG_CTRL2,
+                       .ie_bit    = PCF2127_BIT_CTRL2_TSIE,
+               },
+               .attribute_group = {
+                       .attrs  = pcf2127_attrs,
+               },
+       },
+       [PCF2131] = {
+               .type = PCF2131,
+               .max_register = 0x36,
+               .has_nvmem = 0,
+               .has_bit_wd_ctl_cd0 = 0,
+               .wd_val_reg_readable = 0,
+               .has_int_a_b = 1,
+               .reg_time_base = PCF2131_REG_TIME_BASE,
+               .regs_alarm_base = PCF2131_REG_ALARM_BASE,
+               .reg_wd_ctl = PCF2131_REG_WD_CTL,
+               .reg_wd_val = PCF2131_REG_WD_VAL,
+               .reg_clkout = PCF2131_REG_CLKOUT,
+               .wdd_clock_hz_x1000 = PCF2131_WD_CLOCK_HZ_X1000,
+               .wdd_min_hw_heartbeat_ms = PCF2131_WD_MIN_HW_HEARTBEAT_MS,
+               .ts_count = 4,
+               .ts[0] = {
+                       .reg_base  = PCF2131_REG_TS1_BASE,
+                       .gnd_detect_reg = PCF2131_REG_CTRL4,
+                       .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF1,
+                       .inter_detect_bit = 0,
+                       .ie_reg    = PCF2131_REG_CTRL5,
+                       .ie_bit    = PCF2131_BIT_CTRL5_TSIE1,
+               },
+               .ts[1] = {
+                       .reg_base  = PCF2131_REG_TS2_BASE,
+                       .gnd_detect_reg = PCF2131_REG_CTRL4,
+                       .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF2,
+                       .inter_detect_bit = 0,
+                       .ie_reg    = PCF2131_REG_CTRL5,
+                       .ie_bit    = PCF2131_BIT_CTRL5_TSIE2,
+               },
+               .ts[2] = {
+                       .reg_base  = PCF2131_REG_TS3_BASE,
+                       .gnd_detect_reg = PCF2131_REG_CTRL4,
+                       .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF3,
+                       .inter_detect_bit = 0,
+                       .ie_reg    = PCF2131_REG_CTRL5,
+                       .ie_bit    = PCF2131_BIT_CTRL5_TSIE3,
+               },
+               .ts[3] = {
+                       .reg_base  = PCF2131_REG_TS4_BASE,
+                       .gnd_detect_reg = PCF2131_REG_CTRL4,
+                       .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF4,
+                       .inter_detect_bit = 0,
+                       .ie_reg    = PCF2131_REG_CTRL5,
+                       .ie_bit    = PCF2131_BIT_CTRL5_TSIE4,
+               },
+               .attribute_group = {
+                       .attrs  = pcf2131_attrs,
+               },
+       },
+};
+
+/*
+ * Enable timestamp function and corresponding interrupt(s).
+ */
+static int pcf2127_enable_ts(struct device *dev, int ts_id)
+{
+       struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
+       int ret;
+
+       if (ts_id >= pcf2127->cfg->ts_count) {
+               dev_err(dev, "%s: invalid tamper detection ID (%d)\n",
+                       __func__, ts_id);
+               return -EINVAL;
+       }
+
+       /* Enable timestamp function. */
+       ret = regmap_update_bits(pcf2127->regmap,
+                                pcf2127->cfg->ts[ts_id].reg_base,
+                                PCF2127_BIT_TS_CTRL_TSOFF |
+                                PCF2127_BIT_TS_CTRL_TSM,
+                                PCF2127_BIT_TS_CTRL_TSM);
+       if (ret) {
+               dev_err(dev, "%s: tamper detection config (ts%d_ctrl) failed\n",
+                       __func__, ts_id);
+               return ret;
+       }
+
+       /*
+        * Enable interrupt generation when TSF timestamp flag is set.
+        * Interrupt signals are open-drain outputs and can be left floating if
+        * unused.
+        */
+       ret = regmap_update_bits(pcf2127->regmap, pcf2127->cfg->ts[ts_id].ie_reg,
+                                pcf2127->cfg->ts[ts_id].ie_bit,
+                                pcf2127->cfg->ts[ts_id].ie_bit);
+       if (ret) {
+               dev_err(dev, "%s: tamper detection TSIE%d config failed\n",
+                       __func__, ts_id);
+               return ret;
+       }
+
+       return ret;
+}
+
+/* Route all interrupt sources to INT A pin. */
+static int pcf2127_configure_interrupt_pins(struct device *dev)
+{
+       struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
+       int ret;
+
+       /* Mask bits need to be cleared to enable corresponding
+        * interrupt source.
+        */
+       ret = regmap_write(pcf2127->regmap,
+                          PCF2131_REG_INT_A_MASK1, 0);
+       if (ret)
+               return ret;
+
+       ret = regmap_write(pcf2127->regmap,
+                          PCF2131_REG_INT_A_MASK2, 0);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
 static int pcf2127_probe(struct device *dev, struct regmap *regmap,
-                        int alarm_irq, const char *name, bool is_pcf2127)
+                        int alarm_irq, const struct pcf21xx_config *config)
 {
        struct pcf2127 *pcf2127;
        int ret = 0;
@@ -645,6 +1107,7 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                return -ENOMEM;
 
        pcf2127->regmap = regmap;
+       pcf2127->cfg = config;
 
        dev_set_drvdata(dev, pcf2127);
 
@@ -656,8 +1119,16 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
        pcf2127->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        pcf2127->rtc->range_max = RTC_TIMESTAMP_END_2099;
        pcf2127->rtc->set_start_time = true; /* Sets actual start to 1970 */
-       set_bit(RTC_FEATURE_ALARM_RES_2S, pcf2127->rtc->features);
-       clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, pcf2127->rtc->features);
+
+       /*
+        * PCF2127/29 do not work correctly when setting alarms at 1s intervals.
+        * PCF2131 is ok.
+        */
+       if (pcf2127->cfg->type == PCF2127 || pcf2127->cfg->type == PCF2129) {
+               set_bit(RTC_FEATURE_ALARM_RES_2S, pcf2127->rtc->features);
+               clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, pcf2127->rtc->features);
+       }
+
        clear_bit(RTC_FEATURE_ALARM, pcf2127->rtc->features);
 
        if (alarm_irq > 0) {
@@ -688,7 +1159,16 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
                set_bit(RTC_FEATURE_ALARM, pcf2127->rtc->features);
        }
 
-       if (is_pcf2127) {
+       if (pcf2127->cfg->has_int_a_b) {
+               /* Configure int A/B pins, independently of alarm_irq. */
+               ret = pcf2127_configure_interrupt_pins(dev);
+               if (ret) {
+                       dev_err(dev, "failed to configure interrupt pins\n");
+                       return ret;
+               }
+       }
+
+       if (pcf2127->cfg->has_nvmem) {
                struct nvmem_config nvmem_cfg = {
                        .priv = pcf2127,
                        .reg_read = pcf2127_nvmem_read,
@@ -703,15 +1183,17 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
         * The "Power-On Reset Override" facility prevents the RTC to do a reset
         * after power on. For normal operation the PORO must be disabled.
         */
-       regmap_clear_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+       ret = regmap_clear_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
                                PCF2127_BIT_CTRL1_POR_OVRD);
+       if (ret < 0)
+               return ret;
 
-       ret = regmap_read(pcf2127->regmap, PCF2127_REG_CLKOUT, &val);
+       ret = regmap_read(pcf2127->regmap, pcf2127->cfg->reg_clkout, &val);
        if (ret < 0)
                return ret;
 
        if (!(val & PCF2127_BIT_CLKOUT_OTPR)) {
-               ret = regmap_set_bits(pcf2127->regmap, PCF2127_REG_CLKOUT,
+               ret = regmap_set_bits(pcf2127->regmap, pcf2127->cfg->reg_clkout,
                                      PCF2127_BIT_CLKOUT_OTPR);
                if (ret < 0)
                        return ret;
@@ -721,20 +1203,20 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
 
        /*
         * Watchdog timer enabled and reset pin /RST activated when timed out.
-        * Select 1Hz clock source for watchdog timer.
+        * Select 1Hz clock source for watchdog timer (1/4Hz for PCF2131).
         * Note: Countdown timer disabled and not available.
-        * For pca2129, pcf2129, only bit[7] is for Symbol WD_CD
+        * For pca2129, pcf2129 and pcf2131, only bit[7] is for Symbol WD_CD
         * of register watchdg_tim_ctl. The bit[6] is labeled
         * as T. Bits labeled as T must always be written with
         * logic 0.
         */
-       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_WD_CTL,
+       ret = regmap_update_bits(pcf2127->regmap, pcf2127->cfg->reg_wd_ctl,
                                 PCF2127_BIT_WD_CTL_CD1 |
                                 PCF2127_BIT_WD_CTL_CD0 |
                                 PCF2127_BIT_WD_CTL_TF1 |
                                 PCF2127_BIT_WD_CTL_TF0,
                                 PCF2127_BIT_WD_CTL_CD1 |
-                                (is_pcf2127 ? PCF2127_BIT_WD_CTL_CD0 : 0) |
+                                (pcf2127->cfg->has_bit_wd_ctl_cd0 ? PCF2127_BIT_WD_CTL_CD0 : 0) |
                                 PCF2127_BIT_WD_CTL_TF1);
        if (ret) {
                dev_err(dev, "%s: watchdog config (wd_ctl) failed\n", __func__);
@@ -760,34 +1242,15 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
        }
 
        /*
-        * Enable timestamp function and store timestamp of first trigger
-        * event until TSF1 and TSF2 interrupt flags are cleared.
-        */
-       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_TS_CTRL,
-                                PCF2127_BIT_TS_CTRL_TSOFF |
-                                PCF2127_BIT_TS_CTRL_TSM,
-                                PCF2127_BIT_TS_CTRL_TSM);
-       if (ret) {
-               dev_err(dev, "%s: tamper detection config (ts_ctrl) failed\n",
-                       __func__);
-               return ret;
-       }
-
-       /*
-        * Enable interrupt generation when TSF1 or TSF2 timestamp flags
-        * are set. Interrupt signal is an open-drain output and can be
-        * left floating if unused.
+        * Enable timestamp functions 1 to 4.
         */
-       ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2,
-                                PCF2127_BIT_CTRL2_TSIE,
-                                PCF2127_BIT_CTRL2_TSIE);
-       if (ret) {
-               dev_err(dev, "%s: tamper detection config (ctrl2) failed\n",
-                       __func__);
-               return ret;
+       for (int i = 0; i < pcf2127->cfg->ts_count; i++) {
+               ret = pcf2127_enable_ts(dev, i);
+               if (ret)
+                       return ret;
        }
 
-       ret = rtc_add_group(pcf2127->rtc, &pcf2127_attr_group);
+       ret = rtc_add_group(pcf2127->rtc, &pcf2127->cfg->attribute_group);
        if (ret) {
                dev_err(dev, "%s: tamper sysfs registering failed\n",
                        __func__);
@@ -799,9 +1262,10 @@ static int pcf2127_probe(struct device *dev, struct regmap *regmap,
 
 #ifdef CONFIG_OF
 static const struct of_device_id pcf2127_of_match[] = {
-       { .compatible = "nxp,pcf2127" },
-       { .compatible = "nxp,pcf2129" },
-       { .compatible = "nxp,pca2129" },
+       { .compatible = "nxp,pcf2127", .data = &pcf21xx_cfg[PCF2127] },
+       { .compatible = "nxp,pcf2129", .data = &pcf21xx_cfg[PCF2129] },
+       { .compatible = "nxp,pca2129", .data = &pcf21xx_cfg[PCF2129] },
+       { .compatible = "nxp,pcf2131", .data = &pcf21xx_cfg[PCF2131] },
        {}
 };
 MODULE_DEVICE_TABLE(of, pcf2127_of_match);
@@ -886,26 +1350,41 @@ static const struct regmap_bus pcf2127_i2c_regmap = {
 static struct i2c_driver pcf2127_i2c_driver;
 
 static const struct i2c_device_id pcf2127_i2c_id[] = {
-       { "pcf2127", 1 },
-       { "pcf2129", 0 },
-       { "pca2129", 0 },
+       { "pcf2127", PCF2127 },
+       { "pcf2129", PCF2129 },
+       { "pca2129", PCF2129 },
+       { "pcf2131", PCF2131 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id);
 
 static int pcf2127_i2c_probe(struct i2c_client *client)
 {
-       const struct i2c_device_id *id = i2c_match_id(pcf2127_i2c_id, client);
        struct regmap *regmap;
-       static const struct regmap_config config = {
+       static struct regmap_config config = {
                .reg_bits = 8,
                .val_bits = 8,
-               .max_register = 0x1d,
        };
+       const struct pcf21xx_config *variant;
 
        if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
                return -ENODEV;
 
+       if (client->dev.of_node) {
+               variant = of_device_get_match_data(&client->dev);
+               if (!variant)
+                       return -ENODEV;
+       } else {
+               enum pcf21xx_type type =
+                       i2c_match_id(pcf2127_i2c_id, client)->driver_data;
+
+               if (type >= PCF21XX_LAST_ID)
+                       return -ENODEV;
+               variant = &pcf21xx_cfg[type];
+       }
+
+       config.max_register = variant->max_register,
+
        regmap = devm_regmap_init(&client->dev, &pcf2127_i2c_regmap,
                                        &client->dev, &config);
        if (IS_ERR(regmap)) {
@@ -914,8 +1393,7 @@ static int pcf2127_i2c_probe(struct i2c_client *client)
                return PTR_ERR(regmap);
        }
 
-       return pcf2127_probe(&client->dev, regmap, client->irq,
-                            pcf2127_i2c_driver.driver.name, id->driver_data);
+       return pcf2127_probe(&client->dev, regmap, client->irq, variant);
 }
 
 static struct i2c_driver pcf2127_i2c_driver = {
@@ -953,17 +1431,32 @@ static void pcf2127_i2c_unregister_driver(void)
 #if IS_ENABLED(CONFIG_SPI_MASTER)
 
 static struct spi_driver pcf2127_spi_driver;
+static const struct spi_device_id pcf2127_spi_id[];
 
 static int pcf2127_spi_probe(struct spi_device *spi)
 {
-       static const struct regmap_config config = {
+       static struct regmap_config config = {
                .reg_bits = 8,
                .val_bits = 8,
                .read_flag_mask = 0xa0,
                .write_flag_mask = 0x20,
-               .max_register = 0x1d,
        };
        struct regmap *regmap;
+       const struct pcf21xx_config *variant;
+
+       if (spi->dev.of_node) {
+               variant = of_device_get_match_data(&spi->dev);
+               if (!variant)
+                       return -ENODEV;
+       } else {
+               enum pcf21xx_type type = spi_get_device_id(spi)->driver_data;
+
+               if (type >= PCF21XX_LAST_ID)
+                       return -ENODEV;
+               variant = &pcf21xx_cfg[type];
+       }
+
+       config.max_register = variant->max_register,
 
        regmap = devm_regmap_init_spi(spi, &config);
        if (IS_ERR(regmap)) {
@@ -972,15 +1465,14 @@ static int pcf2127_spi_probe(struct spi_device *spi)
                return PTR_ERR(regmap);
        }
 
-       return pcf2127_probe(&spi->dev, regmap, spi->irq,
-                            pcf2127_spi_driver.driver.name,
-                            spi_get_device_id(spi)->driver_data);
+       return pcf2127_probe(&spi->dev, regmap, spi->irq, variant);
 }
 
 static const struct spi_device_id pcf2127_spi_id[] = {
-       { "pcf2127", 1 },
-       { "pcf2129", 0 },
-       { "pca2129", 0 },
+       { "pcf2127", PCF2127 },
+       { "pcf2129", PCF2129 },
+       { "pca2129", PCF2129 },
+       { "pcf2131", PCF2131 },
        { }
 };
 MODULE_DEVICE_TABLE(spi, pcf2127_spi_id);
@@ -1045,5 +1537,5 @@ static void __exit pcf2127_exit(void)
 module_exit(pcf2127_exit)
 
 MODULE_AUTHOR("Renaud Cerrato <r.cerrato@til-technologies.fr>");
-MODULE_DESCRIPTION("NXP PCF2127/29 RTC driver");
+MODULE_DESCRIPTION("NXP PCF2127/29/31 RTC driver");
 MODULE_LICENSE("GPL v2");
index e517abf..fdbc07f 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/bcd.h>
 #include <linux/rtc.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pm_wakeirq.h>
 #include <linux/regmap.h>
 
@@ -514,49 +514,40 @@ static struct clk *pcf85063_clkout_register_clk(struct pcf85063 *pcf85063)
 }
 #endif
 
-enum pcf85063_type {
-       PCF85063,
-       PCF85063TP,
-       PCF85063A,
-       RV8263,
-       PCF85063_LAST_ID
+static const struct pcf85063_config config_pcf85063 = {
+       .regmap = {
+               .reg_bits = 8,
+               .val_bits = 8,
+               .max_register = 0x0a,
+       },
 };
 
-static struct pcf85063_config pcf85063_cfg[] = {
-       [PCF85063] = {
-               .regmap = {
-                       .reg_bits = 8,
-                       .val_bits = 8,
-                       .max_register = 0x0a,
-               },
-       },
-       [PCF85063TP] = {
-               .regmap = {
-                       .reg_bits = 8,
-                       .val_bits = 8,
-                       .max_register = 0x0a,
-               },
-       },
-       [PCF85063A] = {
-               .regmap = {
-                       .reg_bits = 8,
-                       .val_bits = 8,
-                       .max_register = 0x11,
-               },
-               .has_alarms = 1,
+static const struct pcf85063_config config_pcf85063tp = {
+       .regmap = {
+               .reg_bits = 8,
+               .val_bits = 8,
+               .max_register = 0x0a,
        },
-       [RV8263] = {
-               .regmap = {
-                       .reg_bits = 8,
-                       .val_bits = 8,
-                       .max_register = 0x11,
-               },
-               .has_alarms = 1,
-               .force_cap_7000 = 1,
+};
+
+static const struct pcf85063_config config_pcf85063a = {
+       .regmap = {
+               .reg_bits = 8,
+               .val_bits = 8,
+               .max_register = 0x11,
        },
+       .has_alarms = 1,
 };
 
-static const struct i2c_device_id pcf85063_ids[];
+static const struct pcf85063_config config_rv8263 = {
+       .regmap = {
+               .reg_bits = 8,
+               .val_bits = 8,
+               .max_register = 0x11,
+       },
+       .has_alarms = 1,
+       .force_cap_7000 = 1,
+};
 
 static int pcf85063_probe(struct i2c_client *client)
 {
@@ -579,17 +570,9 @@ static int pcf85063_probe(struct i2c_client *client)
        if (!pcf85063)
                return -ENOMEM;
 
-       if (client->dev.of_node) {
-               config = of_device_get_match_data(&client->dev);
-               if (!config)
-                       return -ENODEV;
-       } else {
-               enum pcf85063_type type =
-                       i2c_match_id(pcf85063_ids, client)->driver_data;
-               if (type >= PCF85063_LAST_ID)
-                       return -ENODEV;
-               config = &pcf85063_cfg[type];
-       }
+       config = i2c_get_match_data(client);
+       if (!config)
+               return -ENODEV;
 
        pcf85063->regmap = devm_regmap_init_i2c(client, &config->regmap);
        if (IS_ERR(pcf85063->regmap))
@@ -655,22 +638,22 @@ static int pcf85063_probe(struct i2c_client *client)
 }
 
 static const struct i2c_device_id pcf85063_ids[] = {
-       { "pca85073a", PCF85063A },
-       { "pcf85063", PCF85063 },
-       { "pcf85063tp", PCF85063TP },
-       { "pcf85063a", PCF85063A },
-       { "rv8263", RV8263 },
+       { "pca85073a", .driver_data = (kernel_ulong_t)&config_pcf85063a },
+       { "pcf85063", .driver_data = (kernel_ulong_t)&config_pcf85063 },
+       { "pcf85063tp", .driver_data = (kernel_ulong_t)&config_pcf85063tp },
+       { "pcf85063a", .driver_data = (kernel_ulong_t)&config_pcf85063a },
+       { "rv8263", .driver_data = (kernel_ulong_t)&config_rv8263 },
        {}
 };
 MODULE_DEVICE_TABLE(i2c, pcf85063_ids);
 
 #ifdef CONFIG_OF
 static const struct of_device_id pcf85063_of_match[] = {
-       { .compatible = "nxp,pca85073a", .data = &pcf85063_cfg[PCF85063A] },
-       { .compatible = "nxp,pcf85063", .data = &pcf85063_cfg[PCF85063] },
-       { .compatible = "nxp,pcf85063tp", .data = &pcf85063_cfg[PCF85063TP] },
-       { .compatible = "nxp,pcf85063a", .data = &pcf85063_cfg[PCF85063A] },
-       { .compatible = "microcrystal,rv8263", .data = &pcf85063_cfg[RV8263] },
+       { .compatible = "nxp,pca85073a", .data = &config_pcf85063a },
+       { .compatible = "nxp,pcf85063", .data = &config_pcf85063 },
+       { .compatible = "nxp,pcf85063tp", .data = &config_pcf85063tp },
+       { .compatible = "nxp,pcf85063a", .data = &config_pcf85063a },
+       { .compatible = "microcrystal,rv8263", .data = &config_rv8263 },
        {}
 };
 MODULE_DEVICE_TABLE(of, pcf85063_of_match);
index 65b8b13..0619467 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/errno.h>
 #include <linux/bcd.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 
 /*
@@ -403,6 +402,7 @@ static int pcf85363_probe(struct i2c_client *client)
                },
        };
        int ret, i, err;
+       bool wakeup_source;
 
        if (data)
                config = data;
@@ -432,25 +432,36 @@ static int pcf85363_probe(struct i2c_client *client)
        pcf85363->rtc->ops = &rtc_ops;
        pcf85363->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
        pcf85363->rtc->range_max = RTC_TIMESTAMP_END_2099;
-       clear_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
+
+       wakeup_source = device_property_read_bool(&client->dev,
+                                                 "wakeup-source");
+       if (client->irq > 0 || wakeup_source) {
+               regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
+               regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
+                                  PIN_IO_INTA_OUT, PIN_IO_INTAPM);
+       }
 
        if (client->irq > 0) {
                unsigned long irqflags = IRQF_TRIGGER_LOW;
 
                if (dev_fwnode(&client->dev))
                        irqflags = 0;
-
-               regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
-               regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
-                                  PIN_IO_INTA_OUT, PIN_IO_INTAPM);
                ret = devm_request_threaded_irq(&client->dev, client->irq,
                                                NULL, pcf85363_rtc_handle_irq,
                                                irqflags | IRQF_ONESHOT,
                                                "pcf85363", client);
-               if (ret)
-                       dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
-               else
-                       set_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
+               if (ret) {
+                       dev_warn(&client->dev,
+                                "unable to request IRQ, alarms disabled\n");
+                       client->irq = 0;
+               }
+       }
+
+       if (client->irq > 0 || wakeup_source) {
+               device_init_wakeup(&client->dev, true);
+               set_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
+       } else {
+               clear_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
        }
 
        ret = devm_rtc_register_device(pcf85363->rtc);
index eeacf48..e400c78 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 
 #include "rtc-sa1100.h"
 
index a5a6c87..f8fab02 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/bcd.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 /*
  * Ricoh has a family of I2C based RTCs, which differ only slightly from
@@ -826,8 +826,7 @@ static int rs5c372_probe(struct i2c_client *client)
        rs5c372->client = client;
        i2c_set_clientdata(client, rs5c372);
        if (client->dev.of_node) {
-               rs5c372->type = (enum rtc_type)
-                       of_device_get_match_data(&client->dev);
+               rs5c372->type = (uintptr_t)of_device_get_match_data(&client->dev);
        } else {
                const struct i2c_device_id *id = i2c_match_id(rs5c372_id, client);
                rs5c372->type = id->driver_data;
index 076e56f..2f001c5 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 
@@ -855,11 +855,68 @@ static const struct regmap_config regmap_config = {
         .max_register = 0x37,
 };
 
+static u8 rv3028_set_trickle_charger(struct rv3028_data *rv3028,
+                                    struct i2c_client *client)
+{
+       int ret, val_old, val;
+       u32 ohms, chargeable;
+
+       ret = regmap_read(rv3028->regmap, RV3028_BACKUP, &val_old);
+       if (ret < 0)
+               return ret;
+
+       /* mask out only trickle charger bits */
+       val_old = val_old & (RV3028_BACKUP_TCE | RV3028_BACKUP_TCR_MASK);
+       val = val_old;
+
+       /* setup trickle charger */
+       if (!device_property_read_u32(&client->dev, "trickle-resistor-ohms",
+                                     &ohms)) {
+               int i;
+
+               for (i = 0; i < ARRAY_SIZE(rv3028_trickle_resistors); i++)
+                       if (ohms == rv3028_trickle_resistors[i])
+                               break;
+
+               if (i < ARRAY_SIZE(rv3028_trickle_resistors)) {
+                       /* enable trickle charger and its resistor */
+                       val = RV3028_BACKUP_TCE | i;
+               } else {
+                       dev_warn(&client->dev, "invalid trickle resistor value\n");
+               }
+       }
+
+       if (!device_property_read_u32(&client->dev, "aux-voltage-chargeable",
+                                     &chargeable)) {
+               switch (chargeable) {
+               case 0:
+                       val &= ~RV3028_BACKUP_TCE;
+                       break;
+               case 1:
+                       val |= RV3028_BACKUP_TCE;
+                       break;
+               default:
+                       dev_warn(&client->dev,
+                                "unsupported aux-voltage-chargeable value\n");
+                       break;
+               }
+       }
+
+       /* only update EEPROM if changes are necessary */
+       if (val_old != val) {
+               ret = rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_TCE |
+                                               RV3028_BACKUP_TCR_MASK, val);
+               if (ret)
+                       return ret;
+       }
+
+       return ret;
+}
+
 static int rv3028_probe(struct i2c_client *client)
 {
        struct rv3028_data *rv3028;
        int ret, status;
-       u32 ohms;
        struct nvmem_config nvmem_cfg = {
                .name = "rv3028_nvram",
                .word_size = 1,
@@ -937,24 +994,9 @@ static int rv3028_probe(struct i2c_client *client)
        if (ret)
                return ret;
 
-       /* setup trickle charger */
-       if (!device_property_read_u32(&client->dev, "trickle-resistor-ohms",
-                                     &ohms)) {
-               int i;
-
-               for (i = 0; i < ARRAY_SIZE(rv3028_trickle_resistors); i++)
-                       if (ohms == rv3028_trickle_resistors[i])
-                               break;
-
-               if (i < ARRAY_SIZE(rv3028_trickle_resistors)) {
-                       ret = rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_TCE |
-                                                RV3028_BACKUP_TCR_MASK, RV3028_BACKUP_TCE | i);
-                       if (ret)
-                               return ret;
-               } else {
-                       dev_warn(&client->dev, "invalid trickle resistor value\n");
-               }
-       }
+       ret = rv3028_set_trickle_charger(rv3028, client);
+       if (ret)
+               return ret;
 
        ret = rtc_add_group(rv3028->rtc, &rv3028_attr_group);
        if (ret)
index 6b8eb20..35b2e36 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 
index 98679ca..1a3ec1b 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/rtc.h>
 
 #define RV8803_I2C_TRY_COUNT           4
@@ -645,8 +645,7 @@ static int rv8803_probe(struct i2c_client *client)
        mutex_init(&rv8803->flags_lock);
        rv8803->client = client;
        if (client->dev.of_node) {
-               rv8803->type = (enum rv8803_type)
-                       of_device_get_match_data(&client->dev);
+               rv8803->type = (uintptr_t)of_device_get_match_data(&client->dev);
        } else {
                const struct i2c_device_id *id = i2c_match_id(rv8803_id, client);
 
index 8702db6..834274d 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/spi/spi.h>
 #include <linux/i2c.h>
 
index 82881fd..48efd61 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/i2c.h>
 #include <linux/bcd.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
 #include <linux/log2.h>
index dca736c..56ebbd4 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/rtc.h>
@@ -227,7 +227,7 @@ static int rzn1_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
                return ret;
 
        /* We cannot set alarms more than one week ahead */
-       farest = rtc_tm_to_time64(&tm_now) + (7 * 86400);
+       farest = rtc_tm_to_time64(&tm_now) + rtc->rtcdev->alarm_offset_max;
        alarm = rtc_tm_to_time64(tm);
        if (time_after(alarm, farest))
                return -ERANGE;
@@ -351,6 +351,7 @@ static int rzn1_rtc_probe(struct platform_device *pdev)
 
        rtc->rtcdev->range_min = RTC_TIMESTAMP_BEGIN_2000;
        rtc->rtcdev->range_max = RTC_TIMESTAMP_END_2099;
+       rtc->rtcdev->alarm_offset_max = 7 * 86400;
        rtc->rtcdev->ops = &rzn1_rtc_ops;
        set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtcdev->features);
        clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtcdev->features);
index 70e1a18..2822388 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/log2.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
 
index 3d36e11..76753c7 100644 (file)
@@ -6,11 +6,13 @@
 
 #include <linux/bcd.h>
 #include <linux/clk.h>
+#include <linux/errno.h>
 #include <linux/iopoll.h>
 #include <linux/ioport.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pm_wakeirq.h>
 #include <linux/regmap.h>
 #include <linux/rtc.h>
@@ -89,6 +91,9 @@
 /* Max STM32 RTC register offset is 0x3FC */
 #define UNDEF_REG                      0xFFFF
 
+/* STM32 RTC driver time helpers */
+#define SEC_PER_DAY            (24 * 60 * 60)
+
 struct stm32_rtc;
 
 struct stm32_rtc_registers {
@@ -114,6 +119,7 @@ struct stm32_rtc_data {
        void (*clear_events)(struct stm32_rtc *rtc, unsigned int flags);
        bool has_pclk;
        bool need_dbp;
+       bool need_accuracy;
 };
 
 struct stm32_rtc {
@@ -158,10 +164,9 @@ static int stm32_rtc_enter_init_mode(struct stm32_rtc *rtc)
                 * slowest rtc_ck frequency may be 32kHz and highest should be
                 * 1MHz, we poll every 10 us with a timeout of 100ms.
                 */
-               return readl_relaxed_poll_timeout_atomic(
-                                       rtc->base + regs->isr,
-                                       isr, (isr & STM32_RTC_ISR_INITF),
-                                       10, 100000);
+               return readl_relaxed_poll_timeout_atomic(rtc->base + regs->isr, isr,
+                                                        (isr & STM32_RTC_ISR_INITF),
+                                                        10, 100000);
        }
 
        return 0;
@@ -425,40 +430,42 @@ static int stm32_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
        return 0;
 }
 
-static int stm32_rtc_valid_alrm(struct stm32_rtc *rtc, struct rtc_time *tm)
+static int stm32_rtc_valid_alrm(struct device *dev, struct rtc_time *tm)
 {
-       const struct stm32_rtc_registers *regs = &rtc->data->regs;
-       int cur_day, cur_mon, cur_year, cur_hour, cur_min, cur_sec;
-       unsigned int dr = readl_relaxed(rtc->base + regs->dr);
-       unsigned int tr = readl_relaxed(rtc->base + regs->tr);
-
-       cur_day = (dr & STM32_RTC_DR_DATE) >> STM32_RTC_DR_DATE_SHIFT;
-       cur_mon = (dr & STM32_RTC_DR_MONTH) >> STM32_RTC_DR_MONTH_SHIFT;
-       cur_year = (dr & STM32_RTC_DR_YEAR) >> STM32_RTC_DR_YEAR_SHIFT;
-       cur_sec = (tr & STM32_RTC_TR_SEC) >> STM32_RTC_TR_SEC_SHIFT;
-       cur_min = (tr & STM32_RTC_TR_MIN) >> STM32_RTC_TR_MIN_SHIFT;
-       cur_hour = (tr & STM32_RTC_TR_HOUR) >> STM32_RTC_TR_HOUR_SHIFT;
+       static struct rtc_time now;
+       time64_t max_alarm_time64;
+       int max_day_forward;
+       int next_month;
+       int next_year;
 
        /*
         * Assuming current date is M-D-Y H:M:S.
         * RTC alarm can't be set on a specific month and year.
         * So the valid alarm range is:
         *      M-D-Y H:M:S < alarm <= (M+1)-D-Y H:M:S
-        * with a specific case for December...
         */
-       if ((((tm->tm_year > cur_year) &&
-             (tm->tm_mon == 0x1) && (cur_mon == 0x12)) ||
-            ((tm->tm_year == cur_year) &&
-             (tm->tm_mon <= cur_mon + 1))) &&
-           ((tm->tm_mday > cur_day) ||
-            ((tm->tm_mday == cur_day) &&
-            ((tm->tm_hour > cur_hour) ||
-             ((tm->tm_hour == cur_hour) && (tm->tm_min > cur_min)) ||
-             ((tm->tm_hour == cur_hour) && (tm->tm_min == cur_min) &&
-              (tm->tm_sec >= cur_sec))))))
-               return 0;
+       stm32_rtc_read_time(dev, &now);
+
+       /*
+        * Find the next month and the year of the next month.
+        * Note: tm_mon and next_month are from 0 to 11
+        */
+       next_month = now.tm_mon + 1;
+       if (next_month == 12) {
+               next_month = 0;
+               next_year = now.tm_year + 1;
+       } else {
+               next_year = now.tm_year;
+       }
 
-       return -EINVAL;
+       /* Find the maximum limit of alarm in days. */
+       max_day_forward = rtc_month_days(now.tm_mon, now.tm_year)
+                        - now.tm_mday
+                        + min(rtc_month_days(next_month, next_year), now.tm_mday);
+
+       /* Convert to timestamp and compare the alarm time and its upper limit */
+       max_alarm_time64 = rtc_tm_to_time64(&now) + max_day_forward * SEC_PER_DAY;
+       return rtc_tm_to_time64(tm) <= max_alarm_time64 ? 0 : -EINVAL;
 }
 
 static int stm32_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -469,17 +476,17 @@ static int stm32_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        unsigned int cr, isr, alrmar;
        int ret = 0;
 
-       tm2bcd(tm);
-
        /*
         * RTC alarm can't be set on a specific date, unless this date is
         * up to the same day of month next month.
         */
-       if (stm32_rtc_valid_alrm(rtc, tm) < 0) {
+       if (stm32_rtc_valid_alrm(dev, tm) < 0) {
                dev_err(dev, "Alarm can be set only on upcoming month.\n");
                return -EINVAL;
        }
 
+       tm2bcd(tm);
+
        alrmar = 0;
        /* tm_year and tm_mon are not used because not supported by RTC */
        alrmar |= (tm->tm_mday << STM32_RTC_ALRMXR_DATE_SHIFT) &
@@ -545,6 +552,7 @@ static void stm32_rtc_clear_events(struct stm32_rtc *rtc,
 static const struct stm32_rtc_data stm32_rtc_data = {
        .has_pclk = false,
        .need_dbp = true,
+       .need_accuracy = false,
        .regs = {
                .tr = 0x00,
                .dr = 0x04,
@@ -566,6 +574,7 @@ static const struct stm32_rtc_data stm32_rtc_data = {
 static const struct stm32_rtc_data stm32h7_rtc_data = {
        .has_pclk = true,
        .need_dbp = true,
+       .need_accuracy = false,
        .regs = {
                .tr = 0x00,
                .dr = 0x04,
@@ -596,6 +605,7 @@ static void stm32mp1_rtc_clear_events(struct stm32_rtc *rtc,
 static const struct stm32_rtc_data stm32mp1_data = {
        .has_pclk = true,
        .need_dbp = false,
+       .need_accuracy = true,
        .regs = {
                .tr = 0x00,
                .dr = 0x04,
@@ -628,7 +638,7 @@ static int stm32_rtc_init(struct platform_device *pdev,
        const struct stm32_rtc_registers *regs = &rtc->data->regs;
        unsigned int prer, pred_a, pred_s, pred_a_max, pred_s_max, cr;
        unsigned int rate;
-       int ret = 0;
+       int ret;
 
        rate = clk_get_rate(rtc->rtc_ck);
 
@@ -636,18 +646,32 @@ static int stm32_rtc_init(struct platform_device *pdev,
        pred_a_max = STM32_RTC_PRER_PRED_A >> STM32_RTC_PRER_PRED_A_SHIFT;
        pred_s_max = STM32_RTC_PRER_PRED_S >> STM32_RTC_PRER_PRED_S_SHIFT;
 
-       for (pred_a = pred_a_max; pred_a + 1 > 0; pred_a--) {
-               pred_s = (rate / (pred_a + 1)) - 1;
+       if (rate > (pred_a_max + 1) * (pred_s_max + 1)) {
+               dev_err(&pdev->dev, "rtc_ck rate is too high: %dHz\n", rate);
+               return -EINVAL;
+       }
+
+       if (rtc->data->need_accuracy) {
+               for (pred_a = 0; pred_a <= pred_a_max; pred_a++) {
+                       pred_s = (rate / (pred_a + 1)) - 1;
+
+                       if (pred_s <= pred_s_max && ((pred_s + 1) * (pred_a + 1)) == rate)
+                               break;
+               }
+       } else {
+               for (pred_a = pred_a_max; pred_a + 1 > 0; pred_a--) {
+                       pred_s = (rate / (pred_a + 1)) - 1;
 
-               if (((pred_s + 1) * (pred_a + 1)) == rate)
-                       break;
+                       if (((pred_s + 1) * (pred_a + 1)) == rate)
+                               break;
+               }
        }
 
        /*
         * Can't find a 1Hz, so give priority to RTC power consumption
         * by choosing the higher possible value for prediv_a
         */
-       if ((pred_s > pred_s_max) || (pred_a > pred_a_max)) {
+       if (pred_s > pred_s_max || pred_a > pred_a_max) {
                pred_a = pred_a_max;
                pred_s = (rate / (pred_a + 1)) - 1;
 
@@ -656,6 +680,20 @@ static int stm32_rtc_init(struct platform_device *pdev,
                         "fast" : "slow");
        }
 
+       cr = readl_relaxed(rtc->base + regs->cr);
+
+       prer = readl_relaxed(rtc->base + regs->prer);
+       prer &= STM32_RTC_PRER_PRED_S | STM32_RTC_PRER_PRED_A;
+
+       pred_s = (pred_s << STM32_RTC_PRER_PRED_S_SHIFT) &
+                STM32_RTC_PRER_PRED_S;
+       pred_a = (pred_a << STM32_RTC_PRER_PRED_A_SHIFT) &
+                STM32_RTC_PRER_PRED_A;
+
+       /* quit if there is nothing to initialize */
+       if ((cr & STM32_RTC_CR_FMT) == 0 && prer == (pred_s | pred_a))
+               return 0;
+
        stm32_rtc_wpr_unlock(rtc);
 
        ret = stm32_rtc_enter_init_mode(rtc);
@@ -665,13 +703,10 @@ static int stm32_rtc_init(struct platform_device *pdev,
                goto end;
        }
 
-       prer = (pred_s << STM32_RTC_PRER_PRED_S_SHIFT) & STM32_RTC_PRER_PRED_S;
-       writel_relaxed(prer, rtc->base + regs->prer);
-       prer |= (pred_a << STM32_RTC_PRER_PRED_A_SHIFT) & STM32_RTC_PRER_PRED_A;
-       writel_relaxed(prer, rtc->base + regs->prer);
+       writel_relaxed(pred_s, rtc->base + regs->prer);
+       writel_relaxed(pred_a | pred_s, rtc->base + regs->prer);
 
        /* Force 24h time format */
-       cr = readl_relaxed(rtc->base + regs->cr);
        cr &= ~STM32_RTC_CR_FMT;
        writel_relaxed(cr, rtc->base + regs->cr);
 
@@ -730,16 +765,13 @@ static int stm32_rtc_probe(struct platform_device *pdev)
                rtc->rtc_ck = devm_clk_get(&pdev->dev, NULL);
        } else {
                rtc->pclk = devm_clk_get(&pdev->dev, "pclk");
-               if (IS_ERR(rtc->pclk)) {
-                       dev_err(&pdev->dev, "no pclk clock");
-                       return PTR_ERR(rtc->pclk);
-               }
+               if (IS_ERR(rtc->pclk))
+                       return dev_err_probe(&pdev->dev, PTR_ERR(rtc->pclk), "no pclk clock");
+
                rtc->rtc_ck = devm_clk_get(&pdev->dev, "rtc_ck");
        }
-       if (IS_ERR(rtc->rtc_ck)) {
-               dev_err(&pdev->dev, "no rtc_ck clock");
-               return PTR_ERR(rtc->rtc_ck);
-       }
+       if (IS_ERR(rtc->rtc_ck))
+               return dev_err_probe(&pdev->dev, PTR_ERR(rtc->rtc_ck), "no rtc_ck clock");
 
        if (rtc->data->has_pclk) {
                ret = clk_prepare_enable(rtc->pclk);
@@ -859,7 +891,6 @@ static void stm32_rtc_remove(struct platform_device *pdev)
        device_init_wakeup(&pdev->dev, false);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int stm32_rtc_suspend(struct device *dev)
 {
        struct stm32_rtc *rtc = dev_get_drvdata(dev);
@@ -890,10 +921,10 @@ static int stm32_rtc_resume(struct device *dev)
 
        return ret;
 }
-#endif
 
-static SIMPLE_DEV_PM_OPS(stm32_rtc_pm_ops,
-                        stm32_rtc_suspend, stm32_rtc_resume);
+static const struct dev_pm_ops stm32_rtc_pm_ops = {
+       NOIRQ_SYSTEM_SLEEP_PM_OPS(stm32_rtc_suspend, stm32_rtc_resume)
+};
 
 static struct platform_driver stm32_rtc_driver = {
        .probe          = stm32_rtc_probe,
index 6f11b74..7566d0a 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/delay.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
-#include <linux/of_device.h>
 #include <linux/of.h>
 #include <linux/stmp_device.h>
 #include <linux/stmp3xxx_rtc_wdt.h>
index 71548dd..8e0c669 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -847,8 +846,6 @@ static int sun6i_rtc_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       dev_info(&pdev->dev, "RTC enabled\n");
-
        return 0;
 }
 
index f33dc30..20c7e97 100644 (file)
@@ -244,7 +244,7 @@ static int sp_rtc_probe(struct platform_device *plat_dev)
 
        sp_rtc->irq = platform_get_irq(plat_dev, 0);
        if (sp_rtc->irq < 0)
-               return dev_err_probe(&plat_dev->dev, sp_rtc->irq, "platform_get_irq failed\n");
+               return sp_rtc->irq;
 
        ret = devm_request_irq(&plat_dev->dev, sp_rtc->irq, sp_rtc_irq_handler,
                               IRQF_TRIGGER_RISING, "rtc irq", plat_dev);
index 5d019e3..5cab995 100644 (file)
@@ -14,8 +14,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/types.h>
index 0d90fe9..ec759d8 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/delay.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/sys_soc.h>
 #include <linux/property.h>
index 9f14e24..20faf08 100644 (file)
@@ -252,6 +252,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev)
 
        rtc->rtc->ops = &tps6586x_rtc_ops;
        rtc->rtc->range_max = (1ULL << 30) - 1; /* 30-bit seconds */
+       rtc->rtc->alarm_offset_max = ALM1_VALID_RANGE_IN_SEC;
        rtc->rtc->start_secs = mktime64(2009, 1, 1, 0, 0, 0);
        rtc->rtc->set_start_time = true;
 
index 75e4c2d..411ff66 100644 (file)
@@ -406,11 +406,8 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, tps_rtc);
 
        irq  = platform_get_irq(pdev, 0);
-       if (irq <= 0) {
-               dev_warn(&pdev->dev, "Wake up is not possible as irq = %d\n",
-                       irq);
-               return -ENXIO;
-       }
+       if (irq < 0)
+               return irq;
 
        ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
                tps65910_rtc_interrupt, IRQF_TRIGGER_LOW,
index 81b3694..13f8ce0 100644 (file)
@@ -487,11 +487,24 @@ static const struct rtc_class_ops twl_rtc_ops = {
        .alarm_irq_enable = twl_rtc_alarm_irq_enable,
 };
 
+static int twl_nvram_read(void *priv, unsigned int offset, void *val,
+                         size_t bytes)
+{
+       return twl_i2c_read((long)priv, val, offset, bytes);
+}
+
+static int twl_nvram_write(void *priv, unsigned int offset, void *val,
+                          size_t bytes)
+{
+       return twl_i2c_write((long)priv, val, offset, bytes);
+}
+
 /*----------------------------------------------------------------------*/
 
 static int twl_rtc_probe(struct platform_device *pdev)
 {
        struct twl_rtc *twl_rtc;
+       struct nvmem_config nvmem_cfg;
        struct device_node *np = pdev->dev.of_node;
        int ret = -EINVAL;
        int irq = platform_get_irq(pdev, 0);
@@ -542,7 +555,6 @@ static int twl_rtc_probe(struct platform_device *pdev)
                        REG_INT_MSK_STS_A);
        }
 
-       dev_info(&pdev->dev, "Enabling TWL-RTC\n");
        ret = twl_rtc_write_u8(twl_rtc, BIT_RTC_CTRL_REG_STOP_RTC_M,
                               REG_RTC_CTRL_REG);
        if (ret < 0)
@@ -564,11 +576,8 @@ static int twl_rtc_probe(struct platform_device *pdev)
 
        twl_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
                                        &twl_rtc_ops, THIS_MODULE);
-       if (IS_ERR(twl_rtc->rtc)) {
-               dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
-                       PTR_ERR(twl_rtc->rtc));
+       if (IS_ERR(twl_rtc->rtc))
                return PTR_ERR(twl_rtc->rtc);
-       }
 
        ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
                                        twl_rtc_interrupt,
@@ -579,6 +588,30 @@ static int twl_rtc_probe(struct platform_device *pdev)
                return ret;
        }
 
+       memset(&nvmem_cfg, 0, sizeof(nvmem_cfg));
+       nvmem_cfg.name = "twl-secured-";
+       nvmem_cfg.type = NVMEM_TYPE_BATTERY_BACKED;
+       nvmem_cfg.reg_read = twl_nvram_read,
+       nvmem_cfg.reg_write = twl_nvram_write,
+       nvmem_cfg.word_size = 1;
+       nvmem_cfg.stride = 1;
+       if (twl_class_is_4030()) {
+               /* 20 bytes SECURED_REG area */
+               nvmem_cfg.size = 20;
+               nvmem_cfg.priv = (void *)TWL_MODULE_SECURED_REG;
+               devm_rtc_nvmem_register(twl_rtc->rtc, &nvmem_cfg);
+               /* 8 bytes BACKUP area */
+               nvmem_cfg.name = "twl-backup-";
+               nvmem_cfg.size = 8;
+               nvmem_cfg.priv = (void *)TWL4030_MODULE_BACKUP;
+               devm_rtc_nvmem_register(twl_rtc->rtc, &nvmem_cfg);
+       } else {
+               /* 8 bytes SECURED_REG area */
+               nvmem_cfg.size = 8;
+               nvmem_cfg.priv = (void *)TWL_MODULE_SECURED_REG;
+               devm_rtc_nvmem_register(twl_rtc->rtc, &nvmem_cfg);
+       }
+
        return 0;
 }
 
index 947f807..3c773cf 100644 (file)
@@ -386,8 +386,6 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
        /* enable the RTC if it's not already enabled */
        power5 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_5);
        if (!(power5 &  WM8350_RTC_TICK_ENA)) {
-               dev_info(wm8350->dev, "Starting RTC\n");
-
                wm8350_reg_unlock(wm8350);
 
                ret = wm8350_set_bits(wm8350, WM8350_POWER_MGMT_5,
@@ -426,11 +424,8 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
 
        wm_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm8350",
                                        &wm8350_rtc_ops, THIS_MODULE);
-       if (IS_ERR(wm_rtc->rtc)) {
-               ret = PTR_ERR(wm_rtc->rtc);
-               dev_err(&pdev->dev, "failed to register RTC: %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(wm_rtc->rtc))
+               return PTR_ERR(wm_rtc->rtc);
 
        ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC,
                            wm8350_rtc_update_handler, 0,
index 620fab0..c4e3665 100644 (file)
@@ -1378,16 +1378,12 @@ static ssize_t dasd_vendor_show(struct device *dev,
 
 static DEVICE_ATTR(vendor, 0444, dasd_vendor_show, NULL);
 
-#define UID_STRLEN ( /* vendor */ 3 + 1 + /* serial    */ 14 + 1 +\
-                    /* SSID   */ 4 + 1 + /* unit addr */ 2 + 1 +\
-                    /* vduit */ 32 + 1)
-
 static ssize_t
 dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
+       char uid_string[DASD_UID_STRLEN];
        struct dasd_device *device;
        struct dasd_uid uid;
-       char uid_string[UID_STRLEN];
        char ua_string[3];
 
        device = dasd_device_from_cdev(to_ccwdev(dev));
index 8587e42..bd89b03 100644 (file)
@@ -1079,12 +1079,12 @@ static void dasd_eckd_get_uid_string(struct dasd_conf *conf,
 
        create_uid(conf, &uid);
        if (strlen(uid.vduit) > 0)
-               snprintf(print_uid, sizeof(*print_uid),
+               snprintf(print_uid, DASD_UID_STRLEN,
                         "%s.%s.%04x.%02x.%s",
                         uid.vendor, uid.serial, uid.ssid,
                         uid.real_unit_addr, uid.vduit);
        else
-               snprintf(print_uid, sizeof(*print_uid),
+               snprintf(print_uid, DASD_UID_STRLEN,
                         "%s.%s.%04x.%02x",
                         uid.vendor, uid.serial, uid.ssid,
                         uid.real_unit_addr);
@@ -1093,8 +1093,8 @@ static void dasd_eckd_get_uid_string(struct dasd_conf *conf,
 static int dasd_eckd_check_cabling(struct dasd_device *device,
                                   void *conf_data, __u8 lpm)
 {
+       char print_path_uid[DASD_UID_STRLEN], print_device_uid[DASD_UID_STRLEN];
        struct dasd_eckd_private *private = device->private;
-       char print_path_uid[60], print_device_uid[60];
        struct dasd_conf path_conf;
 
        path_conf.data = conf_data;
@@ -1293,9 +1293,9 @@ static void dasd_eckd_path_available_action(struct dasd_device *device,
        __u8 path_rcd_buf[DASD_ECKD_RCD_DATA_SIZE];
        __u8 lpm, opm, npm, ppm, epm, hpfpm, cablepm;
        struct dasd_conf_data *conf_data;
+       char print_uid[DASD_UID_STRLEN];
        struct dasd_conf path_conf;
        unsigned long flags;
-       char print_uid[60];
        int rc, pos;
 
        opm = 0;
@@ -5855,8 +5855,8 @@ static void dasd_eckd_dump_sense(struct dasd_device *device,
 static int dasd_eckd_reload_device(struct dasd_device *device)
 {
        struct dasd_eckd_private *private = device->private;
+       char print_uid[DASD_UID_STRLEN];
        int rc, old_base;
-       char print_uid[60];
        struct dasd_uid uid;
        unsigned long flags;
 
index 0aa5635..8a4dbe9 100644 (file)
@@ -259,6 +259,10 @@ struct dasd_uid {
        char vduit[33];
 };
 
+#define DASD_UID_STRLEN ( /* vendor */ 3 + 1 + /* serial    */ 14 + 1 +        \
+                         /* SSID   */ 4 + 1 + /* unit addr */ 2 + 1 +  \
+                         /* vduit */ 32 + 1)
+
 /*
  * PPRC Status data
  */
index 06bcb6c..4b7ecd4 100644 (file)
@@ -411,13 +411,13 @@ removeseg:
                        segment_unload(entry->segment_name);
        }
        list_del(&dev_info->lh);
+       up_write(&dcssblk_devices_sem);
 
        dax_remove_host(dev_info->gd);
        kill_dax(dev_info->dax_dev);
        put_dax(dev_info->dax_dev);
        del_gendisk(dev_info->gd);
        put_disk(dev_info->gd);
-       up_write(&dcssblk_devices_sem);
 
        if (device_remove_file_self(dev, attr)) {
                device_unregister(dev);
@@ -790,18 +790,17 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
        }
 
        list_del(&dev_info->lh);
+       /* unload all related segments */
+       list_for_each_entry(entry, &dev_info->seg_list, lh)
+               segment_unload(entry->segment_name);
+       up_write(&dcssblk_devices_sem);
+
        dax_remove_host(dev_info->gd);
        kill_dax(dev_info->dax_dev);
        put_dax(dev_info->dax_dev);
        del_gendisk(dev_info->gd);
        put_disk(dev_info->gd);
 
-       /* unload all related segments */
-       list_for_each_entry(entry, &dev_info->seg_list, lh)
-               segment_unload(entry->segment_name);
-
-       up_write(&dcssblk_devices_sem);
-
        device_unregister(&dev_info->dev);
        put_device(&dev_info->dev);
 
index 9fa92e4..7207a7f 100644 (file)
@@ -111,7 +111,7 @@ static inline unsigned long mon_mca_end(struct mon_msg *monmsg)
 
 static inline u8 mon_mca_type(struct mon_msg *monmsg, u8 index)
 {
-       return *((u8 *) mon_mca_start(monmsg) + monmsg->mca_offset + index);
+       return *((u8 *)__va(mon_mca_start(monmsg)) + monmsg->mca_offset + index);
 }
 
 static inline u32 mon_mca_size(struct mon_msg *monmsg)
@@ -121,12 +121,12 @@ static inline u32 mon_mca_size(struct mon_msg *monmsg)
 
 static inline u32 mon_rec_start(struct mon_msg *monmsg)
 {
-       return *((u32 *) (mon_mca_start(monmsg) + monmsg->mca_offset + 4));
+       return *((u32 *)(__va(mon_mca_start(monmsg)) + monmsg->mca_offset + 4));
 }
 
 static inline u32 mon_rec_end(struct mon_msg *monmsg)
 {
-       return *((u32 *) (mon_mca_start(monmsg) + monmsg->mca_offset + 8));
+       return *((u32 *)(__va(mon_mca_start(monmsg)) + monmsg->mca_offset + 8));
 }
 
 static int mon_check_mca(struct mon_msg *monmsg)
@@ -392,8 +392,7 @@ static ssize_t mon_read(struct file *filp, char __user *data,
        mce_start = mon_mca_start(monmsg) + monmsg->mca_offset;
        if ((monmsg->pos >= mce_start) && (monmsg->pos < mce_start + 12)) {
                count = min(count, (size_t) mce_start + 12 - monmsg->pos);
-               ret = copy_to_user(data, (void *) (unsigned long) monmsg->pos,
-                                  count);
+               ret = copy_to_user(data, __va(monmsg->pos), count);
                if (ret)
                        return -EFAULT;
                monmsg->pos += count;
@@ -406,8 +405,7 @@ static ssize_t mon_read(struct file *filp, char __user *data,
        if (monmsg->pos <= mon_rec_end(monmsg)) {
                count = min(count, (size_t) mon_rec_end(monmsg) - monmsg->pos
                                            + 1);
-               ret = copy_to_user(data, (void *) (unsigned long) monmsg->pos,
-                                  count);
+               ret = copy_to_user(data, __va(monmsg->pos), count);
                if (ret)
                        return -EFAULT;
                monmsg->pos += count;
index 34967e6..a108f2b 100644 (file)
@@ -49,8 +49,6 @@ int register_adapter_interrupt(struct airq_struct *airq)
                        return -ENOMEM;
                airq->flags |= AIRQ_PTR_ALLOCATED;
        }
-       if (!airq->lsi_mask)
-               airq->lsi_mask = 0xff;
        snprintf(dbf_txt, sizeof(dbf_txt), "rairq:%p", airq);
        CIO_TRACE_EVENT(4, dbf_txt);
        isc_register(airq->isc);
@@ -98,7 +96,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy)
        head = &airq_lists[tpi_info->isc];
        rcu_read_lock();
        hlist_for_each_entry_rcu(airq, head, list)
-               if ((*airq->lsi_ptr & airq->lsi_mask) != 0)
+               if (*airq->lsi_ptr != 0)
                        airq->handler(airq, tpi_info);
        rcu_read_unlock();
 
index 4b23c9f..ce04caa 100644 (file)
@@ -366,7 +366,6 @@ static int zcdn_create(const char *name)
 {
        dev_t devt;
        int i, rc = 0;
-       char nodename[ZCDN_MAX_NAME];
        struct zcdn_device *zcdndev;
 
        if (mutex_lock_interruptible(&ap_perms_mutex))
@@ -407,13 +406,11 @@ static int zcdn_create(const char *name)
        zcdndev->device.devt = devt;
        zcdndev->device.groups = zcdn_dev_attr_groups;
        if (name[0])
-               strncpy(nodename, name, sizeof(nodename));
+               rc = dev_set_name(&zcdndev->device, "%s", name);
        else
-               snprintf(nodename, sizeof(nodename),
-                        ZCRYPT_NAME "_%d", (int)MINOR(devt));
-       nodename[sizeof(nodename) - 1] = '\0';
-       if (dev_set_name(&zcdndev->device, nodename)) {
-               rc = -EINVAL;
+               rc = dev_set_name(&zcdndev->device, ZCRYPT_NAME "_%d", (int)MINOR(devt));
+       if (rc) {
+               kfree(zcdndev);
                goto unlockout;
        }
        rc = device_register(&zcdndev->device);
index 0292276..ac67576 100644 (file)
@@ -250,7 +250,6 @@ static struct airq_info *new_airq_info(int index)
        info->airq.handler = virtio_airq_handler;
        info->summary_indicator_idx = index;
        info->airq.lsi_ptr = get_summary_indicator(info);
-       info->airq.lsi_mask = 0xff;
        info->airq.isc = VIRTIO_AIRQ_ISC;
        rc = register_adapter_interrupt(&info->airq);
        if (rc) {
index 3f062e4..013a9a3 100644 (file)
@@ -1451,7 +1451,7 @@ retry_next:
 #endif
                                break;
                        }
-                       scsi_rescan_device(&device->sdev_gendev);
+                       scsi_rescan_device(device);
                        break;
 
                default:
index e51e92f..93c6893 100644 (file)
@@ -27,7 +27,7 @@
 
 #define DRV_NAME               "fnic"
 #define DRV_DESCRIPTION                "Cisco FCoE HBA Driver"
-#define DRV_VERSION            "1.6.0.55"
+#define DRV_VERSION            "1.6.0.56"
 #define PFX                    DRV_NAME ": "
 #define DFX                     DRV_NAME "%d: "
 
@@ -236,6 +236,7 @@ struct fnic {
        unsigned int wq_count;
        unsigned int cq_count;
 
+       struct mutex sgreset_mutex;
        struct dentry *fnic_stats_debugfs_host;
        struct dentry *fnic_stats_debugfs_file;
        struct dentry *fnic_reset_debugfs_file;
index be89ce9..9761b2c 100644 (file)
@@ -2168,39 +2168,6 @@ clean_pending_aborts_end:
 }
 
 /*
- * fnic_scsi_host_start_tag
- * Allocates tagid from host's tag list
- **/
-static inline int
-fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc)
-{
-       struct request *rq = scsi_cmd_to_rq(sc);
-       struct request_queue *q = rq->q;
-       struct request *dummy;
-
-       dummy = blk_mq_alloc_request(q, REQ_OP_WRITE, BLK_MQ_REQ_NOWAIT);
-       if (IS_ERR(dummy))
-               return SCSI_NO_TAG;
-
-       rq->tag = dummy->tag;
-       sc->host_scribble = (unsigned char *)dummy;
-
-       return dummy->tag;
-}
-
-/*
- * fnic_scsi_host_end_tag
- * frees tag allocated by fnic_scsi_host_start_tag.
- **/
-static inline void
-fnic_scsi_host_end_tag(struct fnic *fnic, struct scsi_cmnd *sc)
-{
-       struct request *dummy = (struct request *)sc->host_scribble;
-
-       blk_mq_free_request(dummy);
-}
-
-/*
  * SCSI Eh thread issues a Lun Reset when one or more commands on a LUN
  * fail to get aborted. It calls driver's eh_device_reset with a SCSI command
  * on the LUN.
@@ -2222,7 +2189,6 @@ int fnic_device_reset(struct scsi_cmnd *sc)
        struct reset_stats *reset_stats;
        int tag = rq->tag;
        DECLARE_COMPLETION_ONSTACK(tm_done);
-       int tag_gen_flag = 0;   /*to track tags allocated by fnic driver*/
        bool new_sc = 0;
 
        /* Wait for rport to unblock */
@@ -2252,17 +2218,17 @@ int fnic_device_reset(struct scsi_cmnd *sc)
        }
 
        fnic_priv(sc)->flags = FNIC_DEVICE_RESET;
-       /* Allocate tag if not present */
 
        if (unlikely(tag < 0)) {
                /*
-                * Really should fix the midlayer to pass in a proper
-                * request for ioctls...
+                * For device reset issued through sg3utils, we let
+                * only one LUN_RESET to go through and use a special
+                * tag equal to max_tag_id so that we don't have to allocate
+                * or free it. It won't interact with tags
+                * allocated by mid layer.
                 */
-               tag = fnic_scsi_host_start_tag(fnic, sc);
-               if (unlikely(tag == SCSI_NO_TAG))
-                       goto fnic_device_reset_end;
-               tag_gen_flag = 1;
+               mutex_lock(&fnic->sgreset_mutex);
+               tag = fnic->fnic_max_tag_id;
                new_sc = 1;
        }
        io_lock = fnic_io_lock_hash(fnic, sc);
@@ -2434,9 +2400,8 @@ fnic_device_reset_end:
                  (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
                  fnic_flags_and_state(sc));
 
-       /* free tag if it is allocated */
-       if (unlikely(tag_gen_flag))
-               fnic_scsi_host_end_tag(fnic, sc);
+       if (new_sc)
+               mutex_unlock(&fnic->sgreset_mutex);
 
        FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
                      "Returning from device reset %s\n",
index bd5f39d..9472b97 100644 (file)
@@ -787,7 +787,7 @@ static int hisi_sas_init_device(struct domain_device *device)
                 * However we don't need to issue a hard reset here for these
                 * reasons:
                 * a. When probing the device, libsas/libata already issues a
-                * hard reset in sas_probe_sata() -> ata_sas_async_probe().
+                * hard reset in sas_probe_sata() -> ata_port_probe().
                 * Note that in hisi_sas_debug_I_T_nexus_reset() we take care
                 * to issue a hard reset by checking the dev status (== INIT).
                 * b. When resetting the controller, this is simply unnecessary.
index 3bf4547..12e2653 100644 (file)
@@ -567,8 +567,6 @@ static struct ata_port_operations sas_sata_ops = {
        .qc_prep                = ata_noop_qc_prep,
        .qc_issue               = sas_ata_qc_issue,
        .qc_fill_rtf            = sas_ata_qc_fill_rtf,
-       .port_start             = ata_sas_port_start,
-       .port_stop              = ata_sas_port_stop,
        .set_dmamode            = sas_ata_set_dmamode,
        .sched_eh               = sas_ata_sched_eh,
        .end_eh                 = sas_ata_end_eh,
@@ -609,9 +607,6 @@ int sas_ata_init(struct domain_device *found_dev)
        ap->private_data = found_dev;
        ap->cbl = ATA_CBL_SATA;
        ap->scsi_host = shost;
-       rc = ata_sas_port_init(ap);
-       if (rc)
-               goto destroy_port;
 
        rc = ata_sas_tport_add(ata_host->dev, ap);
        if (rc)
@@ -623,7 +618,7 @@ int sas_ata_init(struct domain_device *found_dev)
        return 0;
 
 destroy_port:
-       ata_sas_port_destroy(ap);
+       kfree(ap);
 free_host:
        ata_host_put(ata_host);
        return rc;
@@ -657,7 +652,7 @@ void sas_probe_sata(struct asd_sas_port *port)
                if (!dev_is_sata(dev))
                        continue;
 
-               ata_sas_async_probe(dev->sata_dev.ap);
+               ata_port_probe(dev->sata_dev.ap);
        }
        mutex_unlock(&port->ha->disco_mutex);
 
index 15cb996..ff7b63b 100644 (file)
@@ -301,7 +301,7 @@ void sas_free_device(struct kref *kref)
 
        if (dev_is_sata(dev) && dev->sata_dev.ap) {
                ata_sas_tport_delete(dev->sata_dev.ap);
-               ata_sas_port_destroy(dev->sata_dev.ap);
+               kfree(dev->sata_dev.ap);
                ata_host_put(dev->sata_dev.ata_host);
                dev->sata_dev.ata_host = NULL;
                dev->sata_dev.ap = NULL;
index 0c103f4..9047cfc 100644 (file)
@@ -386,37 +386,7 @@ struct sas_phy *sas_get_local_phy(struct domain_device *dev)
 }
 EXPORT_SYMBOL_GPL(sas_get_local_phy);
 
-static void sas_wait_eh(struct domain_device *dev)
-{
-       struct sas_ha_struct *ha = dev->port->ha;
-       DEFINE_WAIT(wait);
-
-       if (dev_is_sata(dev)) {
-               ata_port_wait_eh(dev->sata_dev.ap);
-               return;
-       }
- retry:
-       spin_lock_irq(&ha->lock);
-
-       while (test_bit(SAS_DEV_EH_PENDING, &dev->state)) {
-               prepare_to_wait(&ha->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
-               spin_unlock_irq(&ha->lock);
-               schedule();
-               spin_lock_irq(&ha->lock);
-       }
-       finish_wait(&ha->eh_wait_q, &wait);
-
-       spin_unlock_irq(&ha->lock);
-
-       /* make sure SCSI EH is complete */
-       if (scsi_host_in_recovery(ha->shost)) {
-               msleep(10);
-               goto retry;
-       }
-}
-
-static int sas_queue_reset(struct domain_device *dev, int reset_type,
-                          u64 lun, int wait)
+static int sas_queue_reset(struct domain_device *dev, int reset_type, u64 lun)
 {
        struct sas_ha_struct *ha = dev->port->ha;
        int scheduled = 0, tries = 100;
@@ -424,8 +394,6 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type,
        /* ata: promote lun reset to bus reset */
        if (dev_is_sata(dev)) {
                sas_ata_schedule_reset(dev);
-               if (wait)
-                       sas_ata_wait_eh(dev);
                return SUCCESS;
        }
 
@@ -443,9 +411,6 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type,
                }
                spin_unlock_irq(&ha->lock);
 
-               if (wait)
-                       sas_wait_eh(dev);
-
                if (scheduled)
                        return SUCCESS;
        }
@@ -498,7 +463,7 @@ int sas_eh_device_reset_handler(struct scsi_cmnd *cmd)
        struct sas_internal *i = to_sas_internal(host->transportt);
 
        if (current != host->ehandler)
-               return sas_queue_reset(dev, SAS_DEV_LU_RESET, cmd->device->lun, 0);
+               return sas_queue_reset(dev, SAS_DEV_LU_RESET, cmd->device->lun);
 
        int_to_scsilun(cmd->device->lun, &lun);
 
@@ -521,7 +486,7 @@ int sas_eh_target_reset_handler(struct scsi_cmnd *cmd)
        struct sas_internal *i = to_sas_internal(host->transportt);
 
        if (current != host->ehandler)
-               return sas_queue_reset(dev, SAS_DEV_RESET, 0, 0);
+               return sas_queue_reset(dev, SAS_DEV_RESET, 0);
 
        if (!i->dft->lldd_I_T_nexus_reset)
                return FAILED;
index ed3923f..6de35b3 100644 (file)
 *
 *****************************************************************************/
 
-typedef volatile struct _MPI2_SYSTEM_INTERFACE_REGS {
+typedef struct _MPI2_SYSTEM_INTERFACE_REGS {
        U32 Doorbell;           /*0x00 */
        U32 WriteSequence;      /*0x04 */
        U32 HostDiagnostic;     /*0x08 */
index 53f5492..61a32bf 100644 (file)
@@ -138,6 +138,9 @@ _base_get_ioc_facts(struct MPT3SAS_ADAPTER *ioc);
 static void
 _base_clear_outstanding_commands(struct MPT3SAS_ADAPTER *ioc);
 
+static u32
+_base_readl_ext_retry(const void __iomem *addr);
+
 /**
  * mpt3sas_base_check_cmd_timeout - Function
  *             to check timeout and command termination due
@@ -201,7 +204,7 @@ module_param_call(mpt3sas_fwfault_debug, _scsih_set_fwfault_debug,
  * while reading the system interface register.
  */
 static inline u32
-_base_readl_aero(const volatile void __iomem *addr)
+_base_readl_aero(const void __iomem *addr)
 {
        u32 i = 0, ret_val;
 
@@ -213,8 +216,22 @@ _base_readl_aero(const volatile void __iomem *addr)
        return ret_val;
 }
 
+static u32
+_base_readl_ext_retry(const void __iomem *addr)
+{
+       u32 i, ret_val;
+
+       for (i = 0 ; i < 30 ; i++) {
+               ret_val = readl(addr);
+               if (ret_val == 0)
+                       continue;
+       }
+
+       return ret_val;
+}
+
 static inline u32
-_base_readl(const volatile void __iomem *addr)
+_base_readl(const void __iomem *addr)
 {
        return readl(addr);
 }
@@ -940,7 +957,7 @@ mpt3sas_halt_firmware(struct MPT3SAS_ADAPTER *ioc)
 
        dump_stack();
 
-       doorbell = ioc->base_readl(&ioc->chip->Doorbell);
+       doorbell = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
        if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
                mpt3sas_print_fault_code(ioc, doorbell &
                    MPI2_DOORBELL_DATA_MASK);
@@ -6686,7 +6703,7 @@ mpt3sas_base_get_iocstate(struct MPT3SAS_ADAPTER *ioc, int cooked)
 {
        u32 s, sc;
 
-       s = ioc->base_readl(&ioc->chip->Doorbell);
+       s = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
        sc = s & MPI2_IOC_STATE_MASK;
        return cooked ? sc : s;
 }
@@ -6831,7 +6848,7 @@ _base_wait_for_doorbell_ack(struct MPT3SAS_ADAPTER *ioc, int timeout)
                                           __func__, count, timeout));
                        return 0;
                } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) {
-                       doorbell = ioc->base_readl(&ioc->chip->Doorbell);
+                       doorbell = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
                        if ((doorbell & MPI2_IOC_STATE_MASK) ==
                            MPI2_IOC_STATE_FAULT) {
                                mpt3sas_print_fault_code(ioc, doorbell);
@@ -6871,7 +6888,7 @@ _base_wait_for_doorbell_not_used(struct MPT3SAS_ADAPTER *ioc, int timeout)
        count = 0;
        cntdn = 1000 * timeout;
        do {
-               doorbell_reg = ioc->base_readl(&ioc->chip->Doorbell);
+               doorbell_reg = ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
                if (!(doorbell_reg & MPI2_DOORBELL_USED)) {
                        dhsprintk(ioc,
                                  ioc_info(ioc, "%s: successful count(%d), timeout(%d)\n",
@@ -7019,7 +7036,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
        __le32 *mfp;
 
        /* make sure doorbell is not in use */
-       if ((ioc->base_readl(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) {
+       if ((ioc->base_readl_ext_retry(&ioc->chip->Doorbell) & MPI2_DOORBELL_USED)) {
                ioc_err(ioc, "doorbell is in use (line=%d)\n", __LINE__);
                return -EFAULT;
        }
@@ -7068,7 +7085,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
        }
 
        /* read the first two 16-bits, it gives the total length of the reply */
-       reply[0] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell)
+       reply[0] = le16_to_cpu(ioc->base_readl_ext_retry(&ioc->chip->Doorbell)
            & MPI2_DOORBELL_DATA_MASK);
        writel(0, &ioc->chip->HostInterruptStatus);
        if ((_base_wait_for_doorbell_int(ioc, 5))) {
@@ -7076,7 +7093,7 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
                        __LINE__);
                return -EFAULT;
        }
-       reply[1] = le16_to_cpu(ioc->base_readl(&ioc->chip->Doorbell)
+       reply[1] = le16_to_cpu(ioc->base_readl_ext_retry(&ioc->chip->Doorbell)
            & MPI2_DOORBELL_DATA_MASK);
        writel(0, &ioc->chip->HostInterruptStatus);
 
@@ -7087,10 +7104,10 @@ _base_handshake_req_reply_wait(struct MPT3SAS_ADAPTER *ioc, int request_bytes,
                        return -EFAULT;
                }
                if (i >=  reply_bytes/2) /* overflow case */
-                       ioc->base_readl(&ioc->chip->Doorbell);
+                       ioc->base_readl_ext_retry(&ioc->chip->Doorbell);
                else
                        reply[i] = le16_to_cpu(
-                           ioc->base_readl(&ioc->chip->Doorbell)
+                           ioc->base_readl_ext_retry(&ioc->chip->Doorbell)
                            & MPI2_DOORBELL_DATA_MASK);
                writel(0, &ioc->chip->HostInterruptStatus);
        }
@@ -7949,7 +7966,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
                        goto out;
                }
 
-               host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
+               host_diagnostic = ioc->base_readl_ext_retry(&ioc->chip->HostDiagnostic);
                drsprintk(ioc,
                          ioc_info(ioc, "wrote magic sequence: count(%d), host_diagnostic(0x%08x)\n",
                                   count, host_diagnostic));
@@ -7969,7 +7986,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
        for (count = 0; count < (300000000 /
                MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC); count++) {
 
-               host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
+               host_diagnostic = ioc->base_readl_ext_retry(&ioc->chip->HostDiagnostic);
 
                if (host_diagnostic == 0xFFFFFFFF) {
                        ioc_info(ioc,
@@ -8359,10 +8376,13 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
        ioc->rdpq_array_enable_assigned = 0;
        ioc->use_32bit_dma = false;
        ioc->dma_mask = 64;
-       if (ioc->is_aero_ioc)
+       if (ioc->is_aero_ioc) {
                ioc->base_readl = &_base_readl_aero;
-       else
+               ioc->base_readl_ext_retry = &_base_readl_ext_retry;
+       } else {
                ioc->base_readl = &_base_readl;
+               ioc->base_readl_ext_retry = &_base_readl;
+       }
        r = mpt3sas_base_map_resources(ioc);
        if (r)
                goto out_free_resources;
index 05364aa..1be0850 100644 (file)
@@ -994,7 +994,7 @@ typedef void (*NVME_BUILD_PRP)(struct MPT3SAS_ADAPTER *ioc, u16 smid,
 typedef void (*PUT_SMID_IO_FP_HIP) (struct MPT3SAS_ADAPTER *ioc, u16 smid,
        u16 funcdep);
 typedef void (*PUT_SMID_DEFAULT) (struct MPT3SAS_ADAPTER *ioc, u16 smid);
-typedef u32 (*BASE_READ_REG) (const volatile void __iomem *addr);
+typedef u32 (*BASE_READ_REG) (const void __iomem *addr);
 /*
  * To get high iops reply queue's msix index when high iops mode is enabled
  * else get the msix index of general reply queues.
@@ -1618,6 +1618,7 @@ struct MPT3SAS_ADAPTER {
        u8              diag_trigger_active;
        u8              atomic_desc_capable;
        BASE_READ_REG   base_readl;
+       BASE_READ_REG   base_readl_ext_retry;
        struct SL_WH_MASTER_TRIGGER_T diag_trigger_master;
        struct SL_WH_EVENT_TRIGGERS_T diag_trigger_event;
        struct SL_WH_SCSI_TRIGGERS_T diag_trigger_scsi;
index 97f9d2f..d9d366e 100644 (file)
@@ -1500,7 +1500,7 @@ static void mvumi_rescan_devices(struct mvumi_hba *mhba, int id)
 
        sdev = scsi_device_lookup(mhba->shost, 0, id, 0);
        if (sdev) {
-               scsi_rescan_device(&sdev->sdev_gendev);
+               scsi_rescan_device(sdev);
                scsi_device_put(sdev);
        }
 }
index c5c0bbd..1619cc3 100644 (file)
@@ -548,7 +548,6 @@ extern void qedf_get_generic_tlv_data(void *dev, struct qed_generic_tlvs *data);
 extern void qedf_wq_grcdump(struct work_struct *work);
 void qedf_stag_change_work(struct work_struct *work);
 void qedf_ctx_soft_reset(struct fc_lport *lport);
-extern void qedf_board_disable_work(struct work_struct *work);
 extern void qedf_schedule_hw_err_handler(void *dev,
                enum qed_hw_err_type err_type);
 
index 0e316cc..7722184 100644 (file)
@@ -67,8 +67,6 @@ void qedi_trace_io(struct qedi_ctx *qedi, struct iscsi_task *task,
 int qedi_alloc_id(struct qedi_portid_tbl *id_tbl, u16 id);
 u16 qedi_alloc_new_id(struct qedi_portid_tbl *id_tbl);
 void qedi_free_id(struct qedi_portid_tbl *id_tbl, u16 id);
-int qedi_create_sysfs_ctx_attr(struct qedi_ctx *qedi);
-void qedi_remove_sysfs_ctx_attr(struct qedi_ctx *qedi);
 void qedi_clearsq(struct qedi_ctx *qedi,
                  struct qedi_conn *qedi_conn,
                  struct iscsi_task *task);
index b002224..44449c7 100644 (file)
@@ -3093,8 +3093,6 @@ qla24xx_vport_create(struct fc_vport *fc_vport, bool disable)
                        vha->flags.difdix_supported = 1;
                        ql_dbg(ql_dbg_user, vha, 0x7082,
                            "Registered for DIF/DIX type 1 and 3 protection.\n");
-                       if (ql2xenabledif == 1)
-                               prot = SHOST_DIX_TYPE0_PROTECTION;
                        scsi_host_set_prot(vha->host,
                            prot | SHOST_DIF_TYPE1_PROTECTION
                            | SHOST_DIF_TYPE2_PROTECTION
index d7e8454..691ef82 100644 (file)
  * ----------------------------------------------------------------------
  * | Module Init and Probe        |       0x0199       |                |
  * | Mailbox commands             |       0x1206       | 0x11a5-0x11ff |
- * | Device Discovery             |       0x2134       | 0x210e-0x2115  |
- * |                              |                    | 0x211c-0x2128  |
- * |                              |                    | 0x212c-0x2134  |
+ * | Device Discovery             |       0x2134       | 0x2112-0x2115  |
+ * |                              |                    | 0x2127-0x2128  |
  * | Queue Command and IO tracing |       0x3074       | 0x300b         |
  * |                              |                    | 0x3027-0x3028  |
  * |                              |                    | 0x303d-0x3041  |
- * |                              |                    | 0x302d,0x3033  |
+ * |                              |                    | 0x302e,0x3033  |
  * |                              |                    | 0x3036,0x3038  |
  * |                              |                    | 0x303a                |
  * | DPC Thread                   |       0x4023       | 0x4002,0x4013  |
index 70482b5..54f0a41 100644 (file)
@@ -368,6 +368,7 @@ ql_log_qp(uint32_t, struct qla_qpair *, int32_t, const char *fmt, ...);
 #define ql_dbg_tgt_tmr 0x00001000 /* Target mode task management */
 #define ql_dbg_tgt_dif  0x00000800 /* Target mode dif */
 #define ql_dbg_edif    0x00000400 /* edif and purex debug */
+#define ql_dbg_unsol   0x00000100 /* Unsolicited path debug */
 
 extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *,
        uint32_t, void **);
index 336b8c6..deb6426 100644 (file)
@@ -346,6 +346,12 @@ struct name_list_extended {
        u8                      sent;
 };
 
+struct qla_nvme_fc_rjt {
+       struct fcnvme_ls_rjt *c;
+       dma_addr_t  cdma;
+       u16 size;
+};
+
 struct els_reject {
        struct fc_els_ls_rjt *c;
        dma_addr_t  cdma;
@@ -503,6 +509,20 @@ struct ct_arg {
        port_id_t       id;
 };
 
+struct qla_nvme_lsrjt_pt_arg {
+       struct fc_port *fcport;
+       u8 opcode;
+       u8 vp_idx;
+       u8 reason;
+       u8 explanation;
+       __le16 nport_handle;
+       u16 control_flags;
+       __le16 ox_id;
+       __le32 xchg_address;
+       u32 tx_byte_count, rx_byte_count;
+       dma_addr_t tx_addr, rx_addr;
+};
+
 /*
  * SRB extensions.
  */
@@ -611,13 +631,16 @@ struct srb_iocb {
                        void *desc;
 
                        /* These are only used with ls4 requests */
-                       int cmd_len;
-                       int rsp_len;
+                       __le32 cmd_len;
+                       __le32 rsp_len;
                        dma_addr_t cmd_dma;
                        dma_addr_t rsp_dma;
                        enum nvmefc_fcp_datadir dir;
                        uint32_t dl;
                        uint32_t timeout_sec;
+                       __le32 exchange_address;
+                       __le16 nport_handle;
+                       __le16 ox_id;
                        struct  list_head   entry;
                } nvme;
                struct {
@@ -707,6 +730,10 @@ typedef struct srb {
        struct fc_port *fcport;
        struct scsi_qla_host *vha;
        unsigned int start_timer:1;
+       unsigned int abort:1;
+       unsigned int aborted:1;
+       unsigned int completed:1;
+       unsigned int unsol_rsp:1;
 
        uint32_t handle;
        uint16_t flags;
@@ -2542,6 +2569,7 @@ enum rscn_addr_format {
 typedef struct fc_port {
        struct list_head list;
        struct scsi_qla_host *vha;
+       struct list_head unsol_ctx_head;
 
        unsigned int conf_compl_supported:1;
        unsigned int deleted:2;
@@ -3742,6 +3770,16 @@ struct qla_fw_resources {
        u16 pad;
 };
 
+struct qla_fw_res {
+       u16      iocb_total;
+       u16      iocb_limit;
+       atomic_t iocb_used;
+
+       u16      exch_total;
+       u16      exch_limit;
+       atomic_t exch_used;
+};
+
 #define QLA_IOCB_PCT_LIMIT 95
 
 struct  qla_buf_pool {
@@ -3787,6 +3825,12 @@ struct qla_qpair {
 
        uint16_t id;                    /* qp number used with FW */
        uint16_t vp_idx;                /* vport ID */
+
+       uint16_t dsd_inuse;
+       uint16_t dsd_avail;
+       struct list_head dsd_list;
+#define NUM_DSD_CHAIN 4096
+
        mempool_t *srb_mempool;
 
        struct pci_dev  *pdev;
@@ -4384,7 +4428,6 @@ struct qla_hw_data {
        uint8_t         aen_mbx_count;
        atomic_t        num_pend_mbx_stage1;
        atomic_t        num_pend_mbx_stage2;
-       atomic_t        num_pend_mbx_stage3;
        uint16_t        frame_payload_size;
 
        uint32_t        login_retry_count;
@@ -4714,11 +4757,6 @@ struct qla_hw_data {
        struct fw_blob  *hablob;
        struct qla82xx_legacy_intr_set nx_legacy_intr;
 
-       uint16_t        gbl_dsd_inuse;
-       uint16_t        gbl_dsd_avail;
-       struct list_head gbl_dsd_list;
-#define NUM_DSD_CHAIN 4096
-
        uint8_t fw_type;
        uint32_t file_prd_off;  /* File firmware product offset */
 
@@ -4800,6 +4838,8 @@ struct qla_hw_data {
        struct els_reject elsrej;
        u8 edif_post_stop_cnt_down;
        struct qla_vp_map *vp_map;
+       struct qla_nvme_fc_rjt lsrjt;
+       struct qla_fw_res fwres ____cacheline_aligned;
 };
 
 #define RX_ELS_SIZE (roundup(sizeof(struct enode) + ELS_MAX_PAYLOAD, SMP_CACHE_BYTES))
@@ -4832,6 +4872,7 @@ struct active_regions {
  * is variable) starting at "iocb".
  */
 struct purex_item {
+       void *purls_context;
        struct list_head list;
        struct scsi_qla_host *vha;
        void (*process_item)(struct scsi_qla_host *vha,
index 1925cc6..f060e59 100644 (file)
@@ -276,6 +276,16 @@ qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
 
                seq_printf(s, "estimate exchange used[%d] high water limit [%d] n",
                           exch_used, ha->base_qpair->fwres.exch_limit);
+
+               if (ql2xenforce_iocb_limit == 2) {
+                       iocbs_used = atomic_read(&ha->fwres.iocb_used);
+                       exch_used  = atomic_read(&ha->fwres.exch_used);
+                       seq_printf(s, "        estimate iocb2 used [%d] high water limit [%d]\n",
+                                       iocbs_used, ha->fwres.iocb_limit);
+
+                       seq_printf(s, "        estimate exchange2 used[%d] high water limit [%d] \n",
+                                       exch_used, ha->fwres.exch_limit);
+               }
        }
 
        return 0;
index 816c0b9..09cb941 100644 (file)
@@ -603,7 +603,11 @@ qla2xxx_msix_rsp_q_hs(int irq, void *dev_id);
 fc_port_t *qla2x00_find_fcport_by_loopid(scsi_qla_host_t *, uint16_t);
 fc_port_t *qla2x00_find_fcport_by_wwpn(scsi_qla_host_t *, u8 *, u8);
 fc_port_t *qla2x00_find_fcport_by_nportid(scsi_qla_host_t *, port_id_t *, u8);
-void __qla_consume_iocb(struct scsi_qla_host *vha, void **pkt, struct rsp_que **rsp);
+void qla24xx_queue_purex_item(scsi_qla_host_t *, struct purex_item *,
+                             void (*process_item)(struct scsi_qla_host *,
+                             struct purex_item *));
+void __qla_consume_iocb(struct scsi_qla_host *, void **, struct rsp_que **);
+void qla2xxx_process_purls_iocb(void **pkt, struct rsp_que **rsp);
 
 /*
  * Global Function Prototypes in qla_sup.c source file.
@@ -666,9 +670,11 @@ extern int qla2xxx_get_vpd_field(scsi_qla_host_t *, char *, char *, size_t);
 extern void qla2xxx_flash_npiv_conf(scsi_qla_host_t *);
 extern int qla24xx_read_fcp_prio_cfg(scsi_qla_host_t *);
 extern int qla2x00_mailbox_passthru(struct bsg_job *bsg_job);
-int __qla_copy_purex_to_buffer(struct scsi_qla_host *vha, void **pkt,
-       struct rsp_que **rsp, u8 *buf, u32 buf_len);
-
+int qla2x00_sys_ld_info(struct bsg_job *bsg_job);
+int __qla_copy_purex_to_buffer(struct scsi_qla_host *, void **,
+       struct rsp_que **, u8 *, u32);
+struct purex_item *qla27xx_copy_multiple_pkt(struct scsi_qla_host *vha,
+       void **pkt, struct rsp_que **rsp, bool is_purls, bool byte_order);
 int qla_mailbox_passthru(scsi_qla_host_t *vha, uint16_t *mbx_in,
                         uint16_t *mbx_out);
 
index df623de..a314cfc 100644 (file)
@@ -2223,6 +2223,8 @@ __qla2x00_async_tm_cmd(struct tmf_arg *arg)
                        rval = QLA_FUNCTION_FAILED;
                }
        }
+       if (tm_iocb->u.tmf.data)
+               rval = tm_iocb->u.tmf.data;
 
 done_free_sp:
        /* ref: INIT */
@@ -4203,7 +4205,7 @@ void qla_init_iocb_limit(scsi_qla_host_t *vha)
        u8 i;
        struct qla_hw_data *ha = vha->hw;
 
-        __qla_adjust_iocb_limit(ha->base_qpair);
+       __qla_adjust_iocb_limit(ha->base_qpair);
        ha->base_qpair->fwres.iocbs_used = 0;
        ha->base_qpair->fwres.exch_used  = 0;
 
@@ -4214,6 +4216,14 @@ void qla_init_iocb_limit(scsi_qla_host_t *vha)
                        ha->queue_pair_map[i]->fwres.exch_used = 0;
                }
        }
+
+       ha->fwres.iocb_total = ha->orig_fw_iocb_count;
+       ha->fwres.iocb_limit = (ha->orig_fw_iocb_count * QLA_IOCB_PCT_LIMIT) / 100;
+       ha->fwres.exch_total = ha->orig_fw_xcb_count;
+       ha->fwres.exch_limit = (ha->orig_fw_xcb_count * QLA_IOCB_PCT_LIMIT) / 100;
+
+       atomic_set(&ha->fwres.iocb_used, 0);
+       atomic_set(&ha->fwres.exch_used, 0);
 }
 
 void qla_adjust_iocb_limit(scsi_qla_host_t *vha)
@@ -5554,6 +5564,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
        INIT_WORK(&fcport->reg_work, qla_register_fcport_fn);
        INIT_LIST_HEAD(&fcport->gnl_entry);
        INIT_LIST_HEAD(&fcport->list);
+       INIT_LIST_HEAD(&fcport->unsol_ctx_head);
 
        INIT_LIST_HEAD(&fcport->sess_cmd_list);
        spin_lock_init(&fcport->sess_cmd_lock);
@@ -5596,7 +5607,7 @@ static void qla_get_login_template(scsi_qla_host_t *vha)
        __be32 *q;
 
        memset(ha->init_cb, 0, ha->init_cb_size);
-       sz = min_t(int, sizeof(struct fc_els_csp), ha->init_cb_size);
+       sz = min_t(int, sizeof(struct fc_els_flogi), ha->init_cb_size);
        rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma,
                                            ha->init_cb, sz);
        if (rval != QLA_SUCCESS) {
@@ -7390,14 +7401,15 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
        }
 
        /* purge MBox commands */
-       if (atomic_read(&ha->num_pend_mbx_stage3)) {
+       spin_lock_irqsave(&ha->hardware_lock, flags);
+       if (test_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags)) {
                clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
                complete(&ha->mbx_intr_comp);
        }
+       spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
        i = 0;
-       while (atomic_read(&ha->num_pend_mbx_stage3) ||
-           atomic_read(&ha->num_pend_mbx_stage2) ||
+       while (atomic_read(&ha->num_pend_mbx_stage2) ||
            atomic_read(&ha->num_pend_mbx_stage1)) {
                msleep(20);
                i++;
@@ -9643,6 +9655,7 @@ struct qla_qpair *qla2xxx_create_qpair(struct scsi_qla_host *vha, int qos,
                qpair->vp_idx = vp_idx;
                qpair->fw_started = ha->flags.fw_started;
                INIT_LIST_HEAD(&qpair->hints_list);
+               INIT_LIST_HEAD(&qpair->dsd_list);
                qpair->chip_reset = ha->base_qpair->chip_reset;
                qpair->enable_class_2 = ha->base_qpair->enable_class_2;
                qpair->enable_explicit_conf =
@@ -9771,6 +9784,19 @@ int qla2xxx_delete_qpair(struct scsi_qla_host *vha, struct qla_qpair *qpair)
        if (ret != QLA_SUCCESS)
                goto fail;
 
+       if (!list_empty(&qpair->dsd_list)) {
+               struct dsd_dma *dsd_ptr, *tdsd_ptr;
+
+               /* clean up allocated prev pool */
+               list_for_each_entry_safe(dsd_ptr, tdsd_ptr,
+                                        &qpair->dsd_list, list) {
+                       dma_pool_free(ha->dl_dma_pool, dsd_ptr->dsd_addr,
+                                     dsd_ptr->dsd_list_dma);
+                       list_del(&dsd_ptr->list);
+                       kfree(dsd_ptr);
+               }
+       }
+
        mutex_lock(&ha->mq_lock);
        ha->queue_pair_map[qpair->id] = NULL;
        clear_bit(qpair->id, ha->qpair_qid_map);
index 0167e85..0556969 100644 (file)
@@ -386,6 +386,7 @@ enum {
        RESOURCE_IOCB = BIT_0,
        RESOURCE_EXCH = BIT_1,  /* exchange */
        RESOURCE_FORCE = BIT_2,
+       RESOURCE_HA = BIT_3,
 };
 
 static inline int
@@ -393,7 +394,7 @@ qla_get_fw_resources(struct qla_qpair *qp, struct iocb_resource *iores)
 {
        u16 iocbs_used, i;
        u16 exch_used;
-       struct qla_hw_data *ha = qp->vha->hw;
+       struct qla_hw_data *ha = qp->hw;
 
        if (!ql2xenforce_iocb_limit) {
                iores->res_type = RESOURCE_NONE;
@@ -428,15 +429,69 @@ qla_get_fw_resources(struct qla_qpair *qp, struct iocb_resource *iores)
                        return -ENOSPC;
                }
        }
+
+       if (ql2xenforce_iocb_limit == 2) {
+               if ((iores->iocb_cnt + atomic_read(&ha->fwres.iocb_used)) >=
+                   ha->fwres.iocb_limit) {
+                       iores->res_type = RESOURCE_NONE;
+                       return -ENOSPC;
+               }
+
+               if (iores->res_type & RESOURCE_EXCH) {
+                       if ((iores->exch_cnt + atomic_read(&ha->fwres.exch_used)) >=
+                           ha->fwres.exch_limit) {
+                               iores->res_type = RESOURCE_NONE;
+                               return -ENOSPC;
+                       }
+               }
+       }
+
 force:
        qp->fwres.iocbs_used += iores->iocb_cnt;
        qp->fwres.exch_used += iores->exch_cnt;
+       if (ql2xenforce_iocb_limit == 2) {
+               atomic_add(iores->iocb_cnt, &ha->fwres.iocb_used);
+               atomic_add(iores->exch_cnt, &ha->fwres.exch_used);
+               iores->res_type |= RESOURCE_HA;
+       }
        return 0;
 }
 
+/*
+ * decrement to zero.  This routine will not decrement below zero
+ * @v:  pointer of type atomic_t
+ * @amount: amount to decrement from v
+ */
+static void qla_atomic_dtz(atomic_t *v, int amount)
+{
+       int c, old, dec;
+
+       c = atomic_read(v);
+       for (;;) {
+               dec = c - amount;
+               if (unlikely(dec < 0))
+                       dec = 0;
+
+               old = atomic_cmpxchg((v), c, dec);
+               if (likely(old == c))
+                       break;
+               c = old;
+       }
+}
+
 static inline void
 qla_put_fw_resources(struct qla_qpair *qp, struct iocb_resource *iores)
 {
+       struct qla_hw_data *ha = qp->hw;
+
+       if (iores->res_type & RESOURCE_HA) {
+               if (iores->res_type & RESOURCE_IOCB)
+                       qla_atomic_dtz(&ha->fwres.iocb_used, iores->iocb_cnt);
+
+               if (iores->res_type & RESOURCE_EXCH)
+                       qla_atomic_dtz(&ha->fwres.exch_used, iores->exch_cnt);
+       }
+
        if (iores->res_type & RESOURCE_IOCB) {
                if (qp->fwres.iocbs_used >= iores->iocb_cnt) {
                        qp->fwres.iocbs_used -= iores->iocb_cnt;
index 1ee9b7d..df90169 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <scsi/scsi_tcq.h>
 
+static int qla_start_scsi_type6(srb_t *sp);
 /**
  * qla2x00_get_cmd_direction() - Determine control_flag data direction.
  * @sp: SCSI command
@@ -590,8 +591,6 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
        uint16_t tot_dsds)
 {
        struct dsd64 *cur_dsd = NULL, *next_dsd;
-       scsi_qla_host_t *vha;
-       struct qla_hw_data *ha;
        struct scsi_cmnd *cmd;
        struct  scatterlist *cur_seg;
        uint8_t avail_dsds;
@@ -613,9 +612,6 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
                return 0;
        }
 
-       vha = sp->vha;
-       ha = vha->hw;
-
        /* Set transfer direction */
        if (cmd->sc_data_direction == DMA_TO_DEVICE) {
                cmd_pkt->control_flags = cpu_to_le16(CF_WRITE_DATA);
@@ -636,14 +632,13 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt,
                tot_dsds -= avail_dsds;
                dsd_list_len = (avail_dsds + 1) * QLA_DSD_SIZE;
 
-               dsd_ptr = list_first_entry(&ha->gbl_dsd_list,
-                   struct dsd_dma, list);
+               dsd_ptr = list_first_entry(&qpair->dsd_list, struct dsd_dma, list);
                next_dsd = dsd_ptr->dsd_addr;
                list_del(&dsd_ptr->list);
-               ha->gbl_dsd_avail--;
+               qpair->dsd_avail--;
                list_add_tail(&dsd_ptr->list, &ctx->dsd_list);
                ctx->dsd_use_cnt++;
-               ha->gbl_dsd_inuse++;
+               qpair->dsd_inuse++;
 
                if (first_iocb) {
                        first_iocb = 0;
@@ -1722,6 +1717,8 @@ qla24xx_dif_start_scsi(srb_t *sp)
        if (scsi_get_prot_op(cmd) == SCSI_PROT_NORMAL) {
                if (cmd->cmd_len <= 16)
                        return qla24xx_start_scsi(sp);
+               else
+                       return qla_start_scsi_type6(sp);
        }
 
        /* Setup device pointers. */
@@ -2101,6 +2098,8 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp)
        if (scsi_get_prot_op(cmd) == SCSI_PROT_NORMAL) {
                if (cmd->cmd_len <= 16)
                        return qla2xxx_start_scsi_mq(sp);
+               else
+                       return qla_start_scsi_type6(sp);
        }
 
        spin_lock_irqsave(&qpair->qp_lock, flags);
@@ -3368,6 +3367,7 @@ qla82xx_start_scsi(srb_t *sp)
        struct qla_hw_data *ha = vha->hw;
        struct req_que *req = NULL;
        struct rsp_que *rsp = NULL;
+       struct qla_qpair *qpair = sp->qpair;
 
        /* Setup device pointers. */
        reg = &ha->iobase->isp82;
@@ -3416,18 +3416,18 @@ qla82xx_start_scsi(srb_t *sp)
                uint16_t i;
 
                more_dsd_lists = qla24xx_calc_dsd_lists(tot_dsds);
-               if ((more_dsd_lists + ha->gbl_dsd_inuse) >= NUM_DSD_CHAIN) {
+               if ((more_dsd_lists + qpair->dsd_inuse) >= NUM_DSD_CHAIN) {
                        ql_dbg(ql_dbg_io, vha, 0x300d,
                            "Num of DSD list %d is than %d for cmd=%p.\n",
-                           more_dsd_lists + ha->gbl_dsd_inuse, NUM_DSD_CHAIN,
+                           more_dsd_lists + qpair->dsd_inuse, NUM_DSD_CHAIN,
                            cmd);
                        goto queuing_error;
                }
 
-               if (more_dsd_lists <= ha->gbl_dsd_avail)
+               if (more_dsd_lists <= qpair->dsd_avail)
                        goto sufficient_dsds;
                else
-                       more_dsd_lists -= ha->gbl_dsd_avail;
+                       more_dsd_lists -= qpair->dsd_avail;
 
                for (i = 0; i < more_dsd_lists; i++) {
                        dsd_ptr = kzalloc(sizeof(struct dsd_dma), GFP_ATOMIC);
@@ -3447,8 +3447,8 @@ qla82xx_start_scsi(srb_t *sp)
                                    "for cmd=%p.\n", cmd);
                                goto queuing_error;
                        }
-                       list_add_tail(&dsd_ptr->list, &ha->gbl_dsd_list);
-                       ha->gbl_dsd_avail++;
+                       list_add_tail(&dsd_ptr->list, &qpair->dsd_list);
+                       qpair->dsd_avail++;
                }
 
 sufficient_dsds:
@@ -3767,21 +3767,28 @@ qla_nvme_ls(srb_t *sp, struct pt_ls4_request *cmd_pkt)
        nvme = &sp->u.iocb_cmd;
        cmd_pkt->entry_type = PT_LS4_REQUEST;
        cmd_pkt->entry_count = 1;
-       cmd_pkt->control_flags = cpu_to_le16(CF_LS4_ORIGINATOR << CF_LS4_SHIFT);
-
        cmd_pkt->timeout = cpu_to_le16(nvme->u.nvme.timeout_sec);
-       cmd_pkt->nport_handle = cpu_to_le16(sp->fcport->loop_id);
        cmd_pkt->vp_index = sp->fcport->vha->vp_idx;
 
+       if (sp->unsol_rsp) {
+               cmd_pkt->control_flags =
+                               cpu_to_le16(CF_LS4_RESPONDER << CF_LS4_SHIFT);
+               cmd_pkt->nport_handle = nvme->u.nvme.nport_handle;
+               cmd_pkt->exchange_address = nvme->u.nvme.exchange_address;
+       } else {
+               cmd_pkt->control_flags =
+                               cpu_to_le16(CF_LS4_ORIGINATOR << CF_LS4_SHIFT);
+               cmd_pkt->nport_handle = cpu_to_le16(sp->fcport->loop_id);
+               cmd_pkt->rx_dseg_count = cpu_to_le16(1);
+               cmd_pkt->rx_byte_count = nvme->u.nvme.rsp_len;
+               cmd_pkt->dsd[1].length  = nvme->u.nvme.rsp_len;
+               put_unaligned_le64(nvme->u.nvme.rsp_dma, &cmd_pkt->dsd[1].address);
+       }
+
        cmd_pkt->tx_dseg_count = cpu_to_le16(1);
-       cmd_pkt->tx_byte_count = cpu_to_le32(nvme->u.nvme.cmd_len);
-       cmd_pkt->dsd[0].length = cpu_to_le32(nvme->u.nvme.cmd_len);
+       cmd_pkt->tx_byte_count = nvme->u.nvme.cmd_len;
+       cmd_pkt->dsd[0].length = nvme->u.nvme.cmd_len;
        put_unaligned_le64(nvme->u.nvme.cmd_dma, &cmd_pkt->dsd[0].address);
-
-       cmd_pkt->rx_dseg_count = cpu_to_le16(1);
-       cmd_pkt->rx_byte_count = cpu_to_le32(nvme->u.nvme.rsp_len);
-       cmd_pkt->dsd[1].length = cpu_to_le32(nvme->u.nvme.rsp_len);
-       put_unaligned_le64(nvme->u.nvme.rsp_dma, &cmd_pkt->dsd[1].address);
 }
 
 static void
@@ -4198,3 +4205,267 @@ queuing_error:
 
        return rval;
 }
+
+/**
+ * qla_start_scsi_type6() - Send a SCSI command to the ISP
+ * @sp: command to send to the ISP
+ *
+ * Returns non-zero if a failure occurred, else zero.
+ */
+static int
+qla_start_scsi_type6(srb_t *sp)
+{
+       int             nseg;
+       unsigned long   flags;
+       uint32_t        *clr_ptr;
+       uint32_t        handle;
+       struct cmd_type_6 *cmd_pkt;
+       uint16_t        cnt;
+       uint16_t        req_cnt;
+       uint16_t        tot_dsds;
+       struct req_que *req = NULL;
+       struct rsp_que *rsp;
+       struct scsi_cmnd *cmd = GET_CMD_SP(sp);
+       struct scsi_qla_host *vha = sp->fcport->vha;
+       struct qla_hw_data *ha = vha->hw;
+       struct qla_qpair *qpair = sp->qpair;
+       uint16_t more_dsd_lists = 0;
+       struct dsd_dma *dsd_ptr;
+       uint16_t i;
+       __be32 *fcp_dl;
+       uint8_t additional_cdb_len;
+       struct ct6_dsd *ctx;
+
+       /* Acquire qpair specific lock */
+       spin_lock_irqsave(&qpair->qp_lock, flags);
+
+       /* Setup qpair pointers */
+       req = qpair->req;
+       rsp = qpair->rsp;
+
+       /* So we know we haven't pci_map'ed anything yet */
+       tot_dsds = 0;
+
+       /* Send marker if required */
+       if (vha->marker_needed != 0) {
+               if (__qla2x00_marker(vha, qpair, 0, 0, MK_SYNC_ALL) != QLA_SUCCESS) {
+                       spin_unlock_irqrestore(&qpair->qp_lock, flags);
+                       return QLA_FUNCTION_FAILED;
+               }
+               vha->marker_needed = 0;
+       }
+
+       handle = qla2xxx_get_next_handle(req);
+       if (handle == 0)
+               goto queuing_error;
+
+       /* Map the sg table so we have an accurate count of sg entries needed */
+       if (scsi_sg_count(cmd)) {
+               nseg = dma_map_sg(&ha->pdev->dev, scsi_sglist(cmd),
+                                 scsi_sg_count(cmd), cmd->sc_data_direction);
+               if (unlikely(!nseg))
+                       goto queuing_error;
+       } else {
+               nseg = 0;
+       }
+
+       tot_dsds = nseg;
+
+       /* eventhough driver only need 1 T6 IOCB, FW still convert DSD to Continueation IOCB */
+       req_cnt = qla24xx_calc_iocbs(vha, tot_dsds);
+
+       sp->iores.res_type = RESOURCE_IOCB | RESOURCE_EXCH;
+       sp->iores.exch_cnt = 1;
+       sp->iores.iocb_cnt = req_cnt;
+
+       if (qla_get_fw_resources(sp->qpair, &sp->iores))
+               goto queuing_error;
+
+       more_dsd_lists = qla24xx_calc_dsd_lists(tot_dsds);
+       if ((more_dsd_lists + qpair->dsd_inuse) >= NUM_DSD_CHAIN) {
+               ql_dbg(ql_dbg_io, vha, 0x3028,
+                      "Num of DSD list %d is than %d for cmd=%p.\n",
+                      more_dsd_lists + qpair->dsd_inuse, NUM_DSD_CHAIN, cmd);
+               goto queuing_error;
+       }
+
+       if (more_dsd_lists <= qpair->dsd_avail)
+               goto sufficient_dsds;
+       else
+               more_dsd_lists -= qpair->dsd_avail;
+
+       for (i = 0; i < more_dsd_lists; i++) {
+               dsd_ptr = kzalloc(sizeof(*dsd_ptr), GFP_ATOMIC);
+               if (!dsd_ptr) {
+                       ql_log(ql_log_fatal, vha, 0x3029,
+                           "Failed to allocate memory for dsd_dma for cmd=%p.\n", cmd);
+                       goto queuing_error;
+               }
+               INIT_LIST_HEAD(&dsd_ptr->list);
+
+               dsd_ptr->dsd_addr = dma_pool_alloc(ha->dl_dma_pool,
+                       GFP_ATOMIC, &dsd_ptr->dsd_list_dma);
+               if (!dsd_ptr->dsd_addr) {
+                       kfree(dsd_ptr);
+                       ql_log(ql_log_fatal, vha, 0x302a,
+                           "Failed to allocate memory for dsd_addr for cmd=%p.\n", cmd);
+                       goto queuing_error;
+               }
+               list_add_tail(&dsd_ptr->list, &qpair->dsd_list);
+               qpair->dsd_avail++;
+       }
+
+sufficient_dsds:
+       req_cnt = 1;
+
+       if (req->cnt < (req_cnt + 2)) {
+               if (IS_SHADOW_REG_CAPABLE(ha)) {
+                       cnt = *req->out_ptr;
+               } else {
+                       cnt = (uint16_t)rd_reg_dword_relaxed(req->req_q_out);
+                       if (qla2x00_check_reg16_for_disconnect(vha, cnt))
+                               goto queuing_error;
+               }
+
+               if (req->ring_index < cnt)
+                       req->cnt = cnt - req->ring_index;
+               else
+                       req->cnt = req->length - (req->ring_index - cnt);
+               if (req->cnt < (req_cnt + 2))
+                       goto queuing_error;
+       }
+
+       ctx = &sp->u.scmd.ct6_ctx;
+
+       memset(ctx, 0, sizeof(struct ct6_dsd));
+       ctx->fcp_cmnd = dma_pool_zalloc(ha->fcp_cmnd_dma_pool,
+               GFP_ATOMIC, &ctx->fcp_cmnd_dma);
+       if (!ctx->fcp_cmnd) {
+               ql_log(ql_log_fatal, vha, 0x3031,
+                   "Failed to allocate fcp_cmnd for cmd=%p.\n", cmd);
+               goto queuing_error;
+       }
+
+       /* Initialize the DSD list and dma handle */
+       INIT_LIST_HEAD(&ctx->dsd_list);
+       ctx->dsd_use_cnt = 0;
+
+       if (cmd->cmd_len > 16) {
+               additional_cdb_len = cmd->cmd_len - 16;
+               if (cmd->cmd_len % 4 ||
+                   cmd->cmd_len > QLA_CDB_BUF_SIZE) {
+                       /*
+                        * SCSI command bigger than 16 bytes must be
+                        * multiple of 4 or too big.
+                        */
+                       ql_log(ql_log_warn, vha, 0x3033,
+                           "scsi cmd len %d not multiple of 4 for cmd=%p.\n",
+                           cmd->cmd_len, cmd);
+                       goto queuing_error_fcp_cmnd;
+               }
+               ctx->fcp_cmnd_len = 12 + cmd->cmd_len + 4;
+       } else {
+               additional_cdb_len = 0;
+               ctx->fcp_cmnd_len = 12 + 16 + 4;
+       }
+
+       /* Build command packet. */
+       req->current_outstanding_cmd = handle;
+       req->outstanding_cmds[handle] = sp;
+       sp->handle = handle;
+       cmd->host_scribble = (unsigned char *)(unsigned long)handle;
+       req->cnt -= req_cnt;
+
+       cmd_pkt = (struct cmd_type_6 *)req->ring_ptr;
+       cmd_pkt->handle = make_handle(req->id, handle);
+
+       /* tagged queuing modifier -- default is TSK_SIMPLE (0). */
+       clr_ptr = (uint32_t *)cmd_pkt + 2;
+       memset(clr_ptr, 0, REQUEST_ENTRY_SIZE - 8);
+       cmd_pkt->dseg_count = cpu_to_le16(tot_dsds);
+
+       /* Set NPORT-ID and LUN number */
+       cmd_pkt->nport_handle = cpu_to_le16(sp->fcport->loop_id);
+       cmd_pkt->port_id[0] = sp->fcport->d_id.b.al_pa;
+       cmd_pkt->port_id[1] = sp->fcport->d_id.b.area;
+       cmd_pkt->port_id[2] = sp->fcport->d_id.b.domain;
+       cmd_pkt->vp_index = sp->vha->vp_idx;
+
+       /* Build IOCB segments */
+       qla24xx_build_scsi_type_6_iocbs(sp, cmd_pkt, tot_dsds);
+
+       int_to_scsilun(cmd->device->lun, &cmd_pkt->lun);
+       host_to_fcp_swap((uint8_t *)&cmd_pkt->lun, sizeof(cmd_pkt->lun));
+
+       /* build FCP_CMND IU */
+       int_to_scsilun(cmd->device->lun, &ctx->fcp_cmnd->lun);
+       ctx->fcp_cmnd->additional_cdb_len = additional_cdb_len;
+
+       if (cmd->sc_data_direction == DMA_TO_DEVICE)
+               ctx->fcp_cmnd->additional_cdb_len |= 1;
+       else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
+               ctx->fcp_cmnd->additional_cdb_len |= 2;
+
+       /* Populate the FCP_PRIO. */
+       if (ha->flags.fcp_prio_enabled)
+               ctx->fcp_cmnd->task_attribute |=
+                   sp->fcport->fcp_prio << 3;
+
+       memcpy(ctx->fcp_cmnd->cdb, cmd->cmnd, cmd->cmd_len);
+
+       fcp_dl = (__be32 *)(ctx->fcp_cmnd->cdb + 16 +
+           additional_cdb_len);
+       *fcp_dl = htonl((uint32_t)scsi_bufflen(cmd));
+
+       cmd_pkt->fcp_cmnd_dseg_len = cpu_to_le16(ctx->fcp_cmnd_len);
+       put_unaligned_le64(ctx->fcp_cmnd_dma,
+                          &cmd_pkt->fcp_cmnd_dseg_address);
+
+       sp->flags |= SRB_FCP_CMND_DMA_VALID;
+       cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd));
+       /* Set total data segment count. */
+       cmd_pkt->entry_count = (uint8_t)req_cnt;
+
+       wmb();
+       /* Adjust ring index. */
+       req->ring_index++;
+       if (req->ring_index == req->length) {
+               req->ring_index = 0;
+               req->ring_ptr = req->ring;
+       } else {
+               req->ring_ptr++;
+       }
+
+       sp->qpair->cmd_cnt++;
+       sp->flags |= SRB_DMA_VALID;
+
+       /* Set chip new ring index. */
+       wrt_reg_dword(req->req_q_in, req->ring_index);
+
+       /* Manage unprocessed RIO/ZIO commands in response queue. */
+       if (vha->flags.process_response_queue &&
+           rsp->ring_ptr->signature != RESPONSE_PROCESSED)
+               qla24xx_process_response_queue(vha, rsp);
+
+       spin_unlock_irqrestore(&qpair->qp_lock, flags);
+
+       return QLA_SUCCESS;
+
+queuing_error_fcp_cmnd:
+       dma_pool_free(ha->fcp_cmnd_dma_pool, ctx->fcp_cmnd, ctx->fcp_cmnd_dma);
+
+queuing_error:
+       if (tot_dsds)
+               scsi_dma_unmap(cmd);
+
+       qla_put_fw_resources(sp->qpair, &sp->iores);
+
+       if (sp->u.scmd.crc_ctx) {
+               mempool_free(sp->u.scmd.crc_ctx, ha->ctx_mempool);
+               sp->u.scmd.crc_ctx = NULL;
+       }
+
+       spin_unlock_irqrestore(&qpair->qp_lock, flags);
+
+       return QLA_FUNCTION_FAILED;
+}
index 1f42a41..e987881 100644 (file)
@@ -56,6 +56,22 @@ const char *const port_state_str[] = {
        [FCS_ONLINE]            = "ONLINE"
 };
 
+#define SFP_DISABLE_LASER_INITIATED    0x15  /* Sub code of 8070 AEN */
+#define SFP_ENABLE_LASER_INITIATED     0x16  /* Sub code of 8070 AEN */
+
+static inline void display_Laser_info(scsi_qla_host_t *vha,
+                                     u16 mb1, u16 mb2, u16 mb3) {
+
+       if (mb1 == SFP_DISABLE_LASER_INITIATED)
+               ql_log(ql_log_warn, vha, 0xf0a2,
+                      "SFP temperature (%d C) reached/exceeded the threshold (%d C). Laser is disabled.\n",
+                      mb3, mb2);
+       if (mb1 == SFP_ENABLE_LASER_INITIATED)
+               ql_log(ql_log_warn, vha, 0xf0a3,
+                      "SFP temperature (%d C) reached normal operating level. Laser is enabled.\n",
+                      mb3);
+}
+
 static void
 qla24xx_process_abts(struct scsi_qla_host *vha, struct purex_item *pkt)
 {
@@ -823,6 +839,135 @@ qla83xx_handle_8200_aen(scsi_qla_host_t *vha, uint16_t *mb)
        }
 }
 
+/**
+ * qla27xx_copy_multiple_pkt() - Copy over purex/purls packets that can
+ * span over multiple IOCBs.
+ * @vha: SCSI driver HA context
+ * @pkt: ELS packet
+ * @rsp: Response queue
+ * @is_purls: True, for Unsolicited Received FC-NVMe LS rsp IOCB
+ *            false, for Unsolicited Received ELS IOCB
+ * @byte_order: True, to change the byte ordering of iocb payload
+ */
+struct purex_item *
+qla27xx_copy_multiple_pkt(struct scsi_qla_host *vha, void **pkt,
+                         struct rsp_que **rsp, bool is_purls,
+                         bool byte_order)
+{
+       struct purex_entry_24xx *purex = NULL;
+       struct pt_ls4_rx_unsol *purls = NULL;
+       struct rsp_que *rsp_q = *rsp;
+       sts_cont_entry_t *new_pkt;
+       uint16_t no_bytes = 0, total_bytes = 0, pending_bytes = 0;
+       uint16_t buffer_copy_offset = 0, payload_size = 0;
+       uint16_t entry_count, entry_count_remaining;
+       struct purex_item *item;
+       void *iocb_pkt = NULL;
+
+       if (is_purls) {
+               purls = *pkt;
+               total_bytes = (le16_to_cpu(purls->frame_size) & 0x0FFF) -
+                             PURX_ELS_HEADER_SIZE;
+               entry_count = entry_count_remaining = purls->entry_count;
+               payload_size = sizeof(purls->payload);
+       } else {
+               purex = *pkt;
+               total_bytes = (le16_to_cpu(purex->frame_size) & 0x0FFF) -
+                             PURX_ELS_HEADER_SIZE;
+               entry_count = entry_count_remaining = purex->entry_count;
+               payload_size = sizeof(purex->els_frame_payload);
+       }
+
+       pending_bytes = total_bytes;
+       no_bytes = (pending_bytes > payload_size) ? payload_size :
+                  pending_bytes;
+       ql_dbg(ql_dbg_async, vha, 0x509a,
+              "%s LS, frame_size 0x%x, entry count %d\n",
+              (is_purls ? "PURLS" : "FPIN"), total_bytes, entry_count);
+
+       item = qla24xx_alloc_purex_item(vha, total_bytes);
+       if (!item)
+               return item;
+
+       iocb_pkt = &item->iocb;
+
+       if (is_purls)
+               memcpy(iocb_pkt, &purls->payload[0], no_bytes);
+       else
+               memcpy(iocb_pkt, &purex->els_frame_payload[0], no_bytes);
+       buffer_copy_offset += no_bytes;
+       pending_bytes -= no_bytes;
+       --entry_count_remaining;
+
+       if (is_purls)
+               ((response_t *)purls)->signature = RESPONSE_PROCESSED;
+       else
+               ((response_t *)purex)->signature = RESPONSE_PROCESSED;
+       wmb();
+
+       do {
+               while ((total_bytes > 0) && (entry_count_remaining > 0)) {
+                       if (rsp_q->ring_ptr->signature == RESPONSE_PROCESSED) {
+                               ql_dbg(ql_dbg_async, vha, 0x5084,
+                                      "Ran out of IOCBs, partial data 0x%x\n",
+                                      buffer_copy_offset);
+                               cpu_relax();
+                               continue;
+                       }
+
+                       new_pkt = (sts_cont_entry_t *)rsp_q->ring_ptr;
+                       *pkt = new_pkt;
+
+                       if (new_pkt->entry_type != STATUS_CONT_TYPE) {
+                               ql_log(ql_log_warn, vha, 0x507a,
+                                      "Unexpected IOCB type, partial data 0x%x\n",
+                                      buffer_copy_offset);
+                               break;
+                       }
+
+                       rsp_q->ring_index++;
+                       if (rsp_q->ring_index == rsp_q->length) {
+                               rsp_q->ring_index = 0;
+                               rsp_q->ring_ptr = rsp_q->ring;
+                       } else {
+                               rsp_q->ring_ptr++;
+                       }
+                       no_bytes = (pending_bytes > sizeof(new_pkt->data)) ?
+                               sizeof(new_pkt->data) : pending_bytes;
+                       if ((buffer_copy_offset + no_bytes) <= total_bytes) {
+                               memcpy(((uint8_t *)iocb_pkt + buffer_copy_offset),
+                                      new_pkt->data, no_bytes);
+                               buffer_copy_offset += no_bytes;
+                               pending_bytes -= no_bytes;
+                               --entry_count_remaining;
+                       } else {
+                               ql_log(ql_log_warn, vha, 0x5044,
+                                      "Attempt to copy more that we got, optimizing..%x\n",
+                                      buffer_copy_offset);
+                               memcpy(((uint8_t *)iocb_pkt + buffer_copy_offset),
+                                      new_pkt->data,
+                                      total_bytes - buffer_copy_offset);
+                       }
+
+                       ((response_t *)new_pkt)->signature = RESPONSE_PROCESSED;
+                       wmb();
+               }
+
+               if (pending_bytes != 0 || entry_count_remaining != 0) {
+                       ql_log(ql_log_fatal, vha, 0x508b,
+                              "Dropping partial FPIN, underrun bytes = 0x%x, entry cnts 0x%x\n",
+                              total_bytes, entry_count_remaining);
+                       qla24xx_free_purex_item(item);
+                       return NULL;
+               }
+       } while (entry_count_remaining > 0);
+
+       if (byte_order)
+               host_to_fcp_swap((uint8_t *)&item->iocb, total_bytes);
+
+       return item;
+}
+
 int
 qla2x00_is_a_vp_did(scsi_qla_host_t *vha, uint32_t rscn_entry)
 {
@@ -958,7 +1103,7 @@ initialize_purex_header:
        return item;
 }
 
-static void
+void
 qla24xx_queue_purex_item(scsi_qla_host_t *vha, struct purex_item *pkt,
                         void (*process_item)(struct scsi_qla_host *vha,
                                              struct purex_item *pkt))
@@ -1798,6 +1943,8 @@ global_port_update:
                break;
 
        case MBA_TEMPERATURE_ALERT:
+               if (IS_QLA27XX(ha) || IS_QLA28XX(ha))
+                       display_Laser_info(vha, mb[1], mb[2], mb[3]);
                ql_dbg(ql_dbg_async, vha, 0x505e,
                    "TEMPERATURE ALERT: %04x %04x %04x\n", mb[1], mb[2], mb[3]);
                break;
@@ -3811,6 +3958,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
        struct qla_hw_data *ha = vha->hw;
        struct purex_entry_24xx *purex_entry;
        struct purex_item *pure_item;
+       struct pt_ls4_rx_unsol *p;
        u16 rsp_in = 0, cur_ring_index;
        int is_shadow_hba;
 
@@ -3983,7 +4131,19 @@ process_err:
                        qla28xx_sa_update_iocb_entry(vha, rsp->req,
                                (struct sa_update_28xx *)pkt);
                        break;
-
+               case PT_LS4_UNSOL:
+                       p = (void *)pkt;
+                       if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt, rsp_in)) {
+                               rsp->ring_ptr = (response_t *)pkt;
+                               rsp->ring_index = cur_ring_index;
+
+                               ql_dbg(ql_dbg_init, vha, 0x2124,
+                                      "Defer processing UNSOL LS req opcode %#x...\n",
+                                      p->payload[0]);
+                               return;
+                       }
+                       qla2xxx_process_purls_iocb((void **)&pkt, &rsp);
+                       break;
                default:
                        /* Type Not Supported. */
                        ql_dbg(ql_dbg_async, vha, 0x5042,
index b05f930..21ec32b 100644 (file)
@@ -273,7 +273,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
                spin_unlock_irqrestore(&ha->hardware_lock, flags);
 
                wait_time = jiffies;
-               atomic_inc(&ha->num_pend_mbx_stage3);
                if (!wait_for_completion_timeout(&ha->mbx_intr_comp,
                    mcp->tov * HZ)) {
                        ql_dbg(ql_dbg_mbx, vha, 0x117a,
@@ -290,7 +289,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
                                spin_unlock_irqrestore(&ha->hardware_lock,
                                    flags);
                                atomic_dec(&ha->num_pend_mbx_stage2);
-                               atomic_dec(&ha->num_pend_mbx_stage3);
                                rval = QLA_ABORTED;
                                goto premature_exit;
                        }
@@ -302,11 +300,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
                        ha->flags.mbox_busy = 0;
                        spin_unlock_irqrestore(&ha->hardware_lock, flags);
                        atomic_dec(&ha->num_pend_mbx_stage2);
-                       atomic_dec(&ha->num_pend_mbx_stage3);
                        rval = QLA_ABORTED;
                        goto premature_exit;
                }
-               atomic_dec(&ha->num_pend_mbx_stage3);
 
                if (time_after(jiffies, wait_time + 5 * HZ))
                        ql_log(ql_log_warn, vha, 0x1015, "cmd=0x%x, waited %d msecs\n",
index 9941b38..db753d7 100644 (file)
 #include <linux/blk-mq.h>
 
 static struct nvme_fc_port_template qla_nvme_fc_transport;
+static int qla_nvme_ls_reject_iocb(struct scsi_qla_host *vha,
+                                  struct qla_qpair *qp,
+                                  struct qla_nvme_lsrjt_pt_arg *a,
+                                  bool is_xchg_terminate);
+
+struct qla_nvme_unsol_ctx {
+       struct list_head elem;
+       struct scsi_qla_host *vha;
+       struct fc_port *fcport;
+       struct srb *sp;
+       struct nvmefc_ls_rsp lsrsp;
+       struct nvmefc_ls_rsp *fd_rsp;
+       struct work_struct lsrsp_work;
+       struct work_struct abort_work;
+       __le32 exchange_address;
+       __le16 nport_handle;
+       __le16 ox_id;
+       int comp_status;
+       spinlock_t cmd_lock;
+};
 
 int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport)
 {
@@ -216,6 +236,55 @@ static void qla_nvme_sp_ls_done(srb_t *sp, int res)
        schedule_work(&priv->ls_work);
 }
 
+static void qla_nvme_release_lsrsp_cmd_kref(struct kref *kref)
+{
+       struct srb *sp = container_of(kref, struct srb, cmd_kref);
+       struct qla_nvme_unsol_ctx *uctx = sp->priv;
+       struct nvmefc_ls_rsp *fd_rsp;
+       unsigned long flags;
+
+       if (!uctx) {
+               qla2x00_rel_sp(sp);
+               return;
+       }
+
+       spin_lock_irqsave(&uctx->cmd_lock, flags);
+       uctx->sp = NULL;
+       sp->priv = NULL;
+       spin_unlock_irqrestore(&uctx->cmd_lock, flags);
+
+       fd_rsp = uctx->fd_rsp;
+
+       list_del(&uctx->elem);
+
+       fd_rsp->done(fd_rsp);
+       kfree(uctx);
+       qla2x00_rel_sp(sp);
+}
+
+static void qla_nvme_lsrsp_complete(struct work_struct *work)
+{
+       struct qla_nvme_unsol_ctx *uctx =
+               container_of(work, struct qla_nvme_unsol_ctx, lsrsp_work);
+
+       kref_put(&uctx->sp->cmd_kref, qla_nvme_release_lsrsp_cmd_kref);
+}
+
+static void qla_nvme_sp_lsrsp_done(srb_t *sp, int res)
+{
+       struct qla_nvme_unsol_ctx *uctx = sp->priv;
+
+       if (WARN_ON_ONCE(kref_read(&sp->cmd_kref) == 0))
+               return;
+
+       if (res)
+               res = -EINVAL;
+
+       uctx->comp_status = res;
+       INIT_WORK(&uctx->lsrsp_work, qla_nvme_lsrsp_complete);
+       schedule_work(&uctx->lsrsp_work);
+}
+
 /* it assumed that QPair lock is held. */
 static void qla_nvme_sp_done(srb_t *sp, int res)
 {
@@ -288,6 +357,92 @@ out:
        kref_put(&sp->cmd_kref, sp->put_fn);
 }
 
+static int qla_nvme_xmt_ls_rsp(struct nvme_fc_local_port *lport,
+                              struct nvme_fc_remote_port *rport,
+                              struct nvmefc_ls_rsp *fd_resp)
+{
+       struct qla_nvme_unsol_ctx *uctx = container_of(fd_resp,
+                               struct qla_nvme_unsol_ctx, lsrsp);
+       struct qla_nvme_rport *qla_rport = rport->private;
+       fc_port_t *fcport = qla_rport->fcport;
+       struct scsi_qla_host *vha = uctx->vha;
+       struct qla_hw_data *ha = vha->hw;
+       struct qla_nvme_lsrjt_pt_arg a;
+       struct srb_iocb *nvme;
+       srb_t *sp;
+       int rval = QLA_FUNCTION_FAILED;
+       uint8_t cnt = 0;
+
+       if (!fcport || fcport->deleted)
+               goto out;
+
+       if (!ha->flags.fw_started)
+               goto out;
+
+       /* Alloc SRB structure */
+       sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC);
+       if (!sp)
+               goto out;
+
+       sp->type = SRB_NVME_LS;
+       sp->name = "nvme_ls";
+       sp->done = qla_nvme_sp_lsrsp_done;
+       sp->put_fn = qla_nvme_release_lsrsp_cmd_kref;
+       sp->priv = (void *)uctx;
+       sp->unsol_rsp = 1;
+       uctx->sp = sp;
+       spin_lock_init(&uctx->cmd_lock);
+       nvme = &sp->u.iocb_cmd;
+       uctx->fd_rsp = fd_resp;
+       nvme->u.nvme.desc = fd_resp;
+       nvme->u.nvme.dir = 0;
+       nvme->u.nvme.dl = 0;
+       nvme->u.nvme.timeout_sec = 0;
+       nvme->u.nvme.cmd_dma = fd_resp->rspdma;
+       nvme->u.nvme.cmd_len = fd_resp->rsplen;
+       nvme->u.nvme.rsp_len = 0;
+       nvme->u.nvme.rsp_dma = 0;
+       nvme->u.nvme.exchange_address = uctx->exchange_address;
+       nvme->u.nvme.nport_handle = uctx->nport_handle;
+       nvme->u.nvme.ox_id = uctx->ox_id;
+       dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma,
+                                  le32_to_cpu(fd_resp->rsplen), DMA_TO_DEVICE);
+
+       ql_dbg(ql_dbg_unsol, vha, 0x2122,
+              "Unsol lsreq portid=%06x %8phC exchange_address 0x%x ox_id 0x%x hdl 0x%x\n",
+              fcport->d_id.b24, fcport->port_name, uctx->exchange_address,
+              uctx->ox_id, uctx->nport_handle);
+retry:
+       rval = qla2x00_start_sp(sp);
+       switch (rval) {
+       case QLA_SUCCESS:
+               break;
+       case EAGAIN:
+               msleep(PURLS_MSLEEP_INTERVAL);
+               cnt++;
+               if (cnt < PURLS_RETRY_COUNT)
+                       goto retry;
+
+               fallthrough;
+       default:
+               ql_dbg(ql_log_warn, vha, 0x2123,
+                      "Failed to xmit Unsol ls response = %d\n", rval);
+               rval = -EIO;
+               qla2x00_rel_sp(sp);
+               goto out;
+       }
+
+       return 0;
+out:
+       memset((void *)&a, 0, sizeof(a));
+       a.vp_idx = vha->vp_idx;
+       a.nport_handle = uctx->nport_handle;
+       a.xchg_address = uctx->exchange_address;
+       qla_nvme_ls_reject_iocb(vha, ha->base_qpair, &a, true);
+       kfree(uctx);
+       return rval;
+}
+
 static void qla_nvme_ls_abort(struct nvme_fc_local_port *lport,
     struct nvme_fc_remote_port *rport, struct nvmefc_ls_req *fd)
 {
@@ -355,7 +510,7 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport,
        nvme->u.nvme.timeout_sec = fd->timeout;
        nvme->u.nvme.cmd_dma = fd->rqstdma;
        dma_sync_single_for_device(&ha->pdev->dev, nvme->u.nvme.cmd_dma,
-           fd->rqstlen, DMA_TO_DEVICE);
+           le32_to_cpu(fd->rqstlen), DMA_TO_DEVICE);
 
        rval = qla2x00_start_sp(sp);
        if (rval != QLA_SUCCESS) {
@@ -720,6 +875,7 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = {
        .ls_abort       = qla_nvme_ls_abort,
        .fcp_io         = qla_nvme_post_cmd,
        .fcp_abort      = qla_nvme_fcp_abort,
+       .xmt_ls_rsp     = qla_nvme_xmt_ls_rsp,
        .map_queues     = qla_nvme_map_queues,
        .max_hw_queues  = DEF_NVME_HW_QUEUES,
        .max_sgl_segments = 1024,
@@ -924,3 +1080,247 @@ inline void qla_wait_nvme_release_cmd_kref(srb_t *orig_sp)
                return;
        kref_put(&orig_sp->cmd_kref, orig_sp->put_fn);
 }
+
+static void qla_nvme_fc_format_rjt(void *buf, u8 ls_cmd, u8 reason,
+                                  u8 explanation, u8 vendor)
+{
+       struct fcnvme_ls_rjt *rjt = buf;
+
+       rjt->w0.ls_cmd = FCNVME_LSDESC_RQST;
+       rjt->desc_list_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt));
+       rjt->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST);
+       rjt->rqst.desc_len =
+               fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst));
+       rjt->rqst.w0.ls_cmd = ls_cmd;
+       rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT);
+       rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt));
+       rjt->rjt.reason_code = reason;
+       rjt->rjt.reason_explanation = explanation;
+       rjt->rjt.vendor = vendor;
+}
+
+static void qla_nvme_lsrjt_pt_iocb(struct scsi_qla_host *vha,
+                                  struct pt_ls4_request *lsrjt_iocb,
+                                  struct qla_nvme_lsrjt_pt_arg *a)
+{
+       lsrjt_iocb->entry_type = PT_LS4_REQUEST;
+       lsrjt_iocb->entry_count = 1;
+       lsrjt_iocb->sys_define = 0;
+       lsrjt_iocb->entry_status = 0;
+       lsrjt_iocb->handle = QLA_SKIP_HANDLE;
+       lsrjt_iocb->nport_handle = a->nport_handle;
+       lsrjt_iocb->exchange_address = a->xchg_address;
+       lsrjt_iocb->vp_index = a->vp_idx;
+
+       lsrjt_iocb->control_flags = cpu_to_le16(a->control_flags);
+
+       put_unaligned_le64(a->tx_addr, &lsrjt_iocb->dsd[0].address);
+       lsrjt_iocb->dsd[0].length = cpu_to_le32(a->tx_byte_count);
+       lsrjt_iocb->tx_dseg_count = cpu_to_le16(1);
+       lsrjt_iocb->tx_byte_count = cpu_to_le32(a->tx_byte_count);
+
+       put_unaligned_le64(a->rx_addr, &lsrjt_iocb->dsd[1].address);
+       lsrjt_iocb->dsd[1].length = 0;
+       lsrjt_iocb->rx_dseg_count = 0;
+       lsrjt_iocb->rx_byte_count = 0;
+}
+
+static int
+qla_nvme_ls_reject_iocb(struct scsi_qla_host *vha, struct qla_qpair *qp,
+                       struct qla_nvme_lsrjt_pt_arg *a, bool is_xchg_terminate)
+{
+       struct pt_ls4_request *lsrjt_iocb;
+
+       lsrjt_iocb = __qla2x00_alloc_iocbs(qp, NULL);
+       if (!lsrjt_iocb) {
+               ql_log(ql_log_warn, vha, 0x210e,
+                      "qla2x00_alloc_iocbs failed.\n");
+               return QLA_FUNCTION_FAILED;
+       }
+
+       if (!is_xchg_terminate) {
+               qla_nvme_fc_format_rjt((void *)vha->hw->lsrjt.c, a->opcode,
+                                      a->reason, a->explanation, 0);
+
+               a->tx_byte_count = sizeof(struct fcnvme_ls_rjt);
+               a->tx_addr = vha->hw->lsrjt.cdma;
+               a->control_flags = CF_LS4_RESPONDER << CF_LS4_SHIFT;
+
+               ql_dbg(ql_dbg_unsol, vha, 0x211f,
+                      "Sending nvme fc ls reject ox_id %04x op %04x\n",
+                      a->ox_id, a->opcode);
+               ql_dump_buffer(ql_dbg_unsol + ql_dbg_verbose, vha, 0x210f,
+                              vha->hw->lsrjt.c, sizeof(*vha->hw->lsrjt.c));
+       } else {
+               a->tx_byte_count = 0;
+               a->control_flags = CF_LS4_RESPONDER_TERM << CF_LS4_SHIFT;
+               ql_dbg(ql_dbg_unsol, vha, 0x2110,
+                      "Terminate nvme ls xchg 0x%x\n", a->xchg_address);
+       }
+
+       qla_nvme_lsrjt_pt_iocb(vha, lsrjt_iocb, a);
+       /* flush iocb to mem before notifying hw doorbell */
+       wmb();
+       qla2x00_start_iocbs(vha, qp->req);
+       return 0;
+}
+
+/*
+ * qla2xxx_process_purls_pkt() - Pass-up Unsolicited
+ * Received FC-NVMe Link Service pkt to nvme_fc_rcv_ls_req().
+ * LLDD need to provide memory for response buffer, which
+ * will be used to reference the exchange corresponding
+ * to the LS when issuing an ls response. LLDD will have to free
+ * response buffer in lport->ops->xmt_ls_rsp().
+ *
+ * @vha: SCSI qla host
+ * @item: ptr to purex_item
+ */
+static void
+qla2xxx_process_purls_pkt(struct scsi_qla_host *vha, struct purex_item *item)
+{
+       struct qla_nvme_unsol_ctx *uctx = item->purls_context;
+       struct qla_nvme_lsrjt_pt_arg a;
+       int ret = 1;
+
+#if (IS_ENABLED(CONFIG_NVME_FC))
+       ret = nvme_fc_rcv_ls_req(uctx->fcport->nvme_remote_port, &uctx->lsrsp,
+                                &item->iocb, item->size);
+#endif
+       if (ret) {
+               ql_dbg(ql_dbg_unsol, vha, 0x2125, "NVMe transport ls_req failed\n");
+               memset((void *)&a, 0, sizeof(a));
+               a.vp_idx = vha->vp_idx;
+               a.nport_handle = uctx->nport_handle;
+               a.xchg_address = uctx->exchange_address;
+               qla_nvme_ls_reject_iocb(vha, vha->hw->base_qpair, &a, true);
+               list_del(&uctx->elem);
+               kfree(uctx);
+       }
+}
+
+static scsi_qla_host_t *
+qla2xxx_get_vha_from_vp_idx(struct qla_hw_data *ha, uint16_t vp_index)
+{
+       scsi_qla_host_t *base_vha, *vha, *tvp;
+       unsigned long flags;
+
+       base_vha = pci_get_drvdata(ha->pdev);
+
+       if (!vp_index && !ha->num_vhosts)
+               return base_vha;
+
+       spin_lock_irqsave(&ha->vport_slock, flags);
+       list_for_each_entry_safe(vha, tvp, &ha->vp_list, list) {
+               if (vha->vp_idx == vp_index) {
+                       spin_unlock_irqrestore(&ha->vport_slock, flags);
+                       return vha;
+               }
+       }
+       spin_unlock_irqrestore(&ha->vport_slock, flags);
+
+       return NULL;
+}
+
+void qla2xxx_process_purls_iocb(void **pkt, struct rsp_que **rsp)
+{
+       struct nvme_fc_remote_port *rport;
+       struct qla_nvme_rport *qla_rport;
+       struct qla_nvme_lsrjt_pt_arg a;
+       struct pt_ls4_rx_unsol *p = *pkt;
+       struct qla_nvme_unsol_ctx *uctx;
+       struct rsp_que *rsp_q = *rsp;
+       struct qla_hw_data *ha;
+       scsi_qla_host_t *vha;
+       fc_port_t *fcport = NULL;
+       struct purex_item *item;
+       port_id_t d_id = {0};
+       port_id_t id = {0};
+       u8 *opcode;
+       bool xmt_reject = false;
+
+       ha = rsp_q->hw;
+
+       vha = qla2xxx_get_vha_from_vp_idx(ha, p->vp_index);
+       if (!vha) {
+               ql_log(ql_log_warn, NULL, 0x2110, "Invalid vp index %d\n", p->vp_index);
+               WARN_ON_ONCE(1);
+               return;
+       }
+
+       memset((void *)&a, 0, sizeof(a));
+       opcode = (u8 *)&p->payload[0];
+       a.opcode = opcode[3];
+       a.vp_idx = p->vp_index;
+       a.nport_handle = p->nport_handle;
+       a.ox_id = p->ox_id;
+       a.xchg_address = p->exchange_address;
+
+       id.b.domain = p->s_id.domain;
+       id.b.area   = p->s_id.area;
+       id.b.al_pa  = p->s_id.al_pa;
+       d_id.b.domain = p->d_id[2];
+       d_id.b.area   = p->d_id[1];
+       d_id.b.al_pa  = p->d_id[0];
+
+       fcport = qla2x00_find_fcport_by_nportid(vha, &id, 0);
+       if (!fcport) {
+               ql_dbg(ql_dbg_unsol, vha, 0x211e,
+                      "Failed to find sid=%06x did=%06x\n",
+                      id.b24, d_id.b24);
+               a.reason = FCNVME_RJT_RC_INV_ASSOC;
+               a.explanation = FCNVME_RJT_EXP_NONE;
+               xmt_reject = true;
+               goto out;
+       }
+       rport = fcport->nvme_remote_port;
+       qla_rport = rport->private;
+
+       item = qla27xx_copy_multiple_pkt(vha, pkt, rsp, true, false);
+       if (!item) {
+               a.reason = FCNVME_RJT_RC_LOGIC;
+               a.explanation = FCNVME_RJT_EXP_NONE;
+               xmt_reject = true;
+               goto out;
+       }
+
+       uctx = kzalloc(sizeof(*uctx), GFP_ATOMIC);
+       if (!uctx) {
+               ql_log(ql_log_info, vha, 0x2126, "Failed allocate memory\n");
+               a.reason = FCNVME_RJT_RC_LOGIC;
+               a.explanation = FCNVME_RJT_EXP_NONE;
+               xmt_reject = true;
+               kfree(item);
+               goto out;
+       }
+
+       uctx->vha = vha;
+       uctx->fcport = fcport;
+       uctx->exchange_address = p->exchange_address;
+       uctx->nport_handle = p->nport_handle;
+       uctx->ox_id = p->ox_id;
+       qla_rport->uctx = uctx;
+       INIT_LIST_HEAD(&uctx->elem);
+       list_add_tail(&uctx->elem, &fcport->unsol_ctx_head);
+       item->purls_context = (void *)uctx;
+
+       ql_dbg(ql_dbg_unsol, vha, 0x2121,
+              "PURLS OP[%01x] size %d xchg addr 0x%x portid %06x\n",
+              item->iocb.iocb[3], item->size, uctx->exchange_address,
+              fcport->d_id.b24);
+       /* +48    0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+        * ----- -----------------------------------------------
+        * 0000: 00 00 00 05 28 00 00 00 07 00 00 00 08 00 00 00
+        * 0010: ab ec 0f cc 00 00 8d 7d 05 00 00 00 10 00 00 00
+        * 0020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+        */
+       ql_dump_buffer(ql_dbg_unsol + ql_dbg_verbose, vha, 0x2120,
+                      &item->iocb, item->size);
+
+       qla24xx_queue_purex_item(vha, item, qla2xxx_process_purls_pkt);
+out:
+       if (xmt_reject) {
+               qla_nvme_ls_reject_iocb(vha, (*rsp)->qpair, &a, false);
+               __qla_consume_iocb(vha, pkt, rsp);
+       }
+}
index d299478..a253ac5 100644 (file)
@@ -21,6 +21,7 @@
 #define Q2T_NVME_NUM_TAGS 2048
 #define QLA_MAX_FC_SEGMENTS 64
 
+struct qla_nvme_unsol_ctx;
 struct scsi_qla_host;
 struct qla_hw_data;
 struct req_que;
@@ -37,6 +38,7 @@ struct nvme_private {
 
 struct qla_nvme_rport {
        struct fc_port *fcport;
+       struct qla_nvme_unsol_ctx *uctx;
 };
 
 #define COMMAND_NVME    0x88            /* Command Type FC-NVMe IOCB */
@@ -75,6 +77,9 @@ struct cmd_nvme {
        struct dsd64 nvme_dsd;
 };
 
+#define PURLS_MSLEEP_INTERVAL  1
+#define PURLS_RETRY_COUNT      5
+
 #define PT_LS4_REQUEST 0x89    /* Link Service pass-through IOCB (request) */
 struct pt_ls4_request {
        uint8_t entry_type;
@@ -118,21 +123,19 @@ struct pt_ls4_rx_unsol {
        __le32  exchange_address;
        uint8_t d_id[3];
        uint8_t r_ctl;
-       be_id_t s_id;
+       le_id_t s_id;
        uint8_t cs_ctl;
        uint8_t f_ctl[3];
        uint8_t type;
        __le16  seq_cnt;
        uint8_t df_ctl;
        uint8_t seq_id;
-       __le16  rx_id;
-       __le16  ox_id;
-       __le32  param;
-       __le32  desc0;
+       __le16 rx_id;
+       __le16 ox_id;
+       __le32  desc0;
 #define PT_LS4_PAYLOAD_OFFSET 0x2c
 #define PT_LS4_FIRST_PACKET_LEN 20
-       __le32  desc_len;
-       __le32  payload[3];
+       __le32 payload[5];
 };
 
 /*
index 6dc80c8..5d1bdc1 100644 (file)
@@ -857,7 +857,9 @@ struct fcp_cmnd {
        uint8_t task_attribute;
        uint8_t task_management;
        uint8_t additional_cdb_len;
-       uint8_t cdb[260]; /* 256 for CDB len and 4 for FCP_DL */
+#define QLA_CDB_BUF_SIZE  256
+#define QLA_FCP_DL_SIZE   4
+       uint8_t cdb[QLA_CDB_BUF_SIZE + QLA_FCP_DL_SIZE]; /* 256 for CDB len and 4 for FCP_DL */
 };
 
 struct dsd_dma {
index 03bc3a0..50db082 100644 (file)
@@ -44,10 +44,11 @@ module_param(ql2xfulldump_on_mpifail, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ql2xfulldump_on_mpifail,
                 "Set this to take full dump on MPI hang.");
 
-int ql2xenforce_iocb_limit = 1;
+int ql2xenforce_iocb_limit = 2;
 module_param(ql2xenforce_iocb_limit, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ql2xenforce_iocb_limit,
-                "Enforce IOCB throttling, to avoid FW congestion. (default: 1)");
+                "Enforce IOCB throttling, to avoid FW congestion. (default: 2) "
+                "1: track usage per queue, 2: track usage per adapter");
 
 /*
  * CT6 CTX allocation cache
@@ -432,6 +433,7 @@ static void qla_init_base_qpair(struct scsi_qla_host *vha, struct req_que *req,
        ha->base_qpair->msix = &ha->msix_entries[QLA_MSIX_RSP_Q];
        ha->base_qpair->srb_mempool = ha->srb_mempool;
        INIT_LIST_HEAD(&ha->base_qpair->hints_list);
+       INIT_LIST_HEAD(&ha->base_qpair->dsd_list);
        ha->base_qpair->enable_class_2 = ql2xenableclass2;
        /* init qpair to this cpu. Will adjust at run time. */
        qla_cpu_update(rsp->qpair, raw_smp_processor_id());
@@ -750,9 +752,9 @@ void qla2x00_sp_free_dma(srb_t *sp)
 
                dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
                    ctx1->fcp_cmnd_dma);
-               list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list);
-               ha->gbl_dsd_inuse -= ctx1->dsd_use_cnt;
-               ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
+               list_splice(&ctx1->dsd_list, &sp->qpair->dsd_list);
+               sp->qpair->dsd_inuse -= ctx1->dsd_use_cnt;
+               sp->qpair->dsd_avail += ctx1->dsd_use_cnt;
        }
 
        if (sp->flags & SRB_GOT_BUF)
@@ -836,9 +838,9 @@ void qla2xxx_qpair_sp_free_dma(srb_t *sp)
 
                dma_pool_free(ha->fcp_cmnd_dma_pool, ctx1->fcp_cmnd,
                    ctx1->fcp_cmnd_dma);
-               list_splice(&ctx1->dsd_list, &ha->gbl_dsd_list);
-               ha->gbl_dsd_inuse -= ctx1->dsd_use_cnt;
-               ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
+               list_splice(&ctx1->dsd_list, &sp->qpair->dsd_list);
+               sp->qpair->dsd_inuse -= ctx1->dsd_use_cnt;
+               sp->qpair->dsd_avail += ctx1->dsd_use_cnt;
                sp->flags &= ~SRB_FCP_CMND_DMA_VALID;
        }
 
@@ -3007,7 +3009,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
        ha->max_exchg = FW_MAX_EXCHANGES_CNT;
        atomic_set(&ha->num_pend_mbx_stage1, 0);
        atomic_set(&ha->num_pend_mbx_stage2, 0);
-       atomic_set(&ha->num_pend_mbx_stage3, 0);
        atomic_set(&ha->zio_threshold, DEFAULT_ZIO_THRESHOLD);
        ha->last_zio_threshold = DEFAULT_ZIO_THRESHOLD;
        INIT_LIST_HEAD(&ha->tmf_pending);
@@ -3288,6 +3289,13 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
        host->max_id = ha->max_fibre_devices;
        host->cmd_per_lun = 3;
        host->unique_id = host->host_no;
+
+       if (ql2xenabledif && ql2xenabledif != 2) {
+               ql_log(ql_log_warn, base_vha, 0x302d,
+                      "Invalid value for ql2xenabledif, resetting it to default (2)\n");
+               ql2xenabledif = 2;
+       }
+
        if (IS_T10_PI_CAPABLE(ha) && ql2xenabledif)
                host->max_cmd_len = 32;
        else
@@ -3524,8 +3532,6 @@ skip_dpc:
                        base_vha->flags.difdix_supported = 1;
                        ql_dbg(ql_dbg_init, base_vha, 0x00f1,
                            "Registering for DIF/DIX type 1 and 3 protection.\n");
-                       if (ql2xenabledif == 1)
-                               prot = SHOST_DIX_TYPE0_PROTECTION;
                        if (ql2xprotmask)
                                scsi_host_set_prot(host, ql2xprotmask);
                        else
@@ -4402,7 +4408,6 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
                           "sf_init_cb=%p.\n", ha->sf_init_cb);
        }
 
-       INIT_LIST_HEAD(&ha->gbl_dsd_list);
 
        /* Get consistent memory allocated for Async Port-Database. */
        if (!IS_FWI2_CAPABLE(ha)) {
@@ -4457,8 +4462,9 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
 
        ha->elsrej.size = sizeof(struct fc_els_ls_rjt) + 16;
        ha->elsrej.c = dma_alloc_coherent(&ha->pdev->dev,
-           ha->elsrej.size, &ha->elsrej.cdma, GFP_KERNEL);
-
+                                         ha->elsrej.size,
+                                         &ha->elsrej.cdma,
+                                         GFP_KERNEL);
        if (!ha->elsrej.c) {
                ql_dbg_pci(ql_dbg_init, ha->pdev, 0xffff,
                    "Alloc failed for els reject cmd.\n");
@@ -4467,8 +4473,21 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
        ha->elsrej.c->er_cmd = ELS_LS_RJT;
        ha->elsrej.c->er_reason = ELS_RJT_LOGIC;
        ha->elsrej.c->er_explan = ELS_EXPL_UNAB_DATA;
+
+       ha->lsrjt.size = sizeof(struct fcnvme_ls_rjt);
+       ha->lsrjt.c = dma_alloc_coherent(&ha->pdev->dev, ha->lsrjt.size,
+                       &ha->lsrjt.cdma, GFP_KERNEL);
+       if (!ha->lsrjt.c) {
+               ql_dbg_pci(ql_dbg_init, ha->pdev, 0xffff,
+                          "Alloc failed for nvme fc reject cmd.\n");
+               goto fail_lsrjt;
+       }
+
        return 0;
 
+fail_lsrjt:
+       dma_free_coherent(&ha->pdev->dev, ha->elsrej.size,
+                         ha->elsrej.c, ha->elsrej.cdma);
 fail_elsrej:
        dma_pool_destroy(ha->purex_dma_pool);
 fail_flt:
@@ -4934,18 +4953,16 @@ qla2x00_mem_free(struct qla_hw_data *ha)
        ha->gid_list = NULL;
        ha->gid_list_dma = 0;
 
-       if (IS_QLA82XX(ha)) {
-               if (!list_empty(&ha->gbl_dsd_list)) {
-                       struct dsd_dma *dsd_ptr, *tdsd_ptr;
-
-                       /* clean up allocated prev pool */
-                       list_for_each_entry_safe(dsd_ptr,
-                               tdsd_ptr, &ha->gbl_dsd_list, list) {
-                               dma_pool_free(ha->dl_dma_pool,
-                               dsd_ptr->dsd_addr, dsd_ptr->dsd_list_dma);
-                               list_del(&dsd_ptr->list);
-                               kfree(dsd_ptr);
-                       }
+       if (!list_empty(&ha->base_qpair->dsd_list)) {
+               struct dsd_dma *dsd_ptr, *tdsd_ptr;
+
+               /* clean up allocated prev pool */
+               list_for_each_entry_safe(dsd_ptr, tdsd_ptr,
+                                        &ha->base_qpair->dsd_list, list) {
+                       dma_pool_free(ha->dl_dma_pool, dsd_ptr->dsd_addr,
+                                     dsd_ptr->dsd_list_dma);
+                       list_del(&dsd_ptr->list);
+                       kfree(dsd_ptr);
                }
        }
 
@@ -5000,6 +5017,12 @@ qla2x00_mem_free(struct qla_hw_data *ha)
                ha->elsrej.c = NULL;
        }
 
+       if (ha->lsrjt.c) {
+               dma_free_coherent(&ha->pdev->dev, ha->lsrjt.size, ha->lsrjt.c,
+                                 ha->lsrjt.cdma);
+               ha->lsrjt.c = NULL;
+       }
+
        ha->init_cb = NULL;
        ha->init_cb_dma = 0;
 
index 81bdf6b..d903563 100644 (file)
@@ -6,9 +6,9 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "10.02.08.500-k"
+#define QLA2XXX_VERSION      "10.02.09.100-k"
 
 #define QLA_DRIVER_MAJOR_VER   10
 #define QLA_DRIVER_MINOR_VER   2
-#define QLA_DRIVER_PATCH_VER   8
-#define QLA_DRIVER_BETA_VER    500
+#define QLA_DRIVER_PATCH_VER   9
+#define QLA_DRIVER_BETA_VER    100
index 217b70c..f795848 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/seq_file.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_dbg.h>
+#include <scsi/scsi_host.h>
 #include "scsi_debugfs.h"
 
 #define SCSI_CMD_FLAG_NAME(name)[const_ilog2(SCMD_##name)] = #name
@@ -33,14 +34,33 @@ static int scsi_flags_show(struct seq_file *m, const unsigned long flags,
 
 void scsi_show_rq(struct seq_file *m, struct request *rq)
 {
-       struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
+       struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq), *cmd2;
+       struct Scsi_Host *shost = cmd->device->host;
        int alloc_ms = jiffies_to_msecs(jiffies - cmd->jiffies_at_alloc);
        int timeout_ms = jiffies_to_msecs(rq->timeout);
+       const char *list_info = NULL;
        char buf[80] = "(?)";
 
+       spin_lock_irq(shost->host_lock);
+       list_for_each_entry(cmd2, &shost->eh_abort_list, eh_entry) {
+               if (cmd == cmd2) {
+                       list_info = "on eh_abort_list";
+                       goto unlock;
+               }
+       }
+       list_for_each_entry(cmd2, &shost->eh_cmd_q, eh_entry) {
+               if (cmd == cmd2) {
+                       list_info = "on eh_cmd_q";
+                       goto unlock;
+               }
+       }
+unlock:
+       spin_unlock_irq(shost->host_lock);
+
        __scsi_format_command(buf, sizeof(buf), cmd->cmnd, cmd->cmd_len);
-       seq_printf(m, ", .cmd=%s, .retries=%d, .result = %#x, .flags=", buf,
-                  cmd->retries, cmd->result);
+       seq_printf(m, ", .cmd=%s, .retries=%d, .allowed=%d, .result = %#x, %s%s.flags=",
+                  buf, cmd->retries, cmd->allowed, cmd->result,
+                  list_info ? : "", list_info ? ", " : "");
        scsi_flags_show(m, cmd->flags, scsi_cmd_flags,
                        ARRAY_SIZE(scsi_cmd_flags));
        seq_printf(m, ", .timeout=%d.%03d, allocated %d.%03d s ago",
index 5917694..c2f647a 100644 (file)
@@ -2454,7 +2454,7 @@ static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt)
                envp[idx++] = "SDEV_MEDIA_CHANGE=1";
                break;
        case SDEV_EVT_INQUIRY_CHANGE_REPORTED:
-               scsi_rescan_device(&sdev->sdev_gendev);
+               scsi_rescan_device(sdev);
                envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED";
                break;
        case SDEV_EVT_CAPACITY_CHANGE_REPORTED:
index 1188555..3f0dfb9 100644 (file)
@@ -137,7 +137,6 @@ extern int scsi_complete_async_scans(void);
 extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int,
                                   unsigned int, u64, enum scsi_scan_mode);
 extern void scsi_forget_host(struct Scsi_Host *);
-extern void scsi_rescan_device(struct device *);
 
 /* scsi_sysctl.c */
 #ifdef CONFIG_SYSCTL
index aa13feb..52014b2 100644 (file)
@@ -1619,9 +1619,9 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
 }
 EXPORT_SYMBOL(scsi_add_device);
 
-void scsi_rescan_device(struct device *dev)
+void scsi_rescan_device(struct scsi_device *sdev)
 {
-       struct scsi_device *sdev = to_scsi_device(dev);
+       struct device *dev = &sdev->sdev_gendev;
 
        device_lock(dev);
 
index 6031767..24f6eef 100644 (file)
@@ -747,7 +747,7 @@ static ssize_t
 store_rescan_field (struct device *dev, struct device_attribute *attr,
                    const char *buf, size_t count)
 {
-       scsi_rescan_device(dev);
+       scsi_rescan_device(to_scsi_device(dev));
        return count;
 }
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
@@ -840,7 +840,7 @@ store_state_field(struct device *dev, struct device_attribute *attr,
                 * waiting for pending I/O to finish.
                 */
                blk_mq_run_hw_queues(sdev->request_queue, true);
-               scsi_rescan_device(dev);
+               scsi_rescan_device(sdev);
        }
 
        return ret == 0 ? count : -EINVAL;
index 3c668cf..c92a317 100644 (file)
@@ -104,19 +104,7 @@ static void sd_config_discard(struct scsi_disk *, unsigned int);
 static void sd_config_write_same(struct scsi_disk *);
 static int  sd_revalidate_disk(struct gendisk *);
 static void sd_unlock_native_capacity(struct gendisk *disk);
-static int  sd_probe(struct device *);
-static int  sd_remove(struct device *);
 static void sd_shutdown(struct device *);
-static int sd_suspend_system(struct device *);
-static int sd_suspend_runtime(struct device *);
-static int sd_resume_system(struct device *);
-static int sd_resume_runtime(struct device *);
-static void sd_rescan(struct device *);
-static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt);
-static void sd_uninit_command(struct scsi_cmnd *SCpnt);
-static int sd_done(struct scsi_cmnd *);
-static void sd_eh_reset(struct scsi_cmnd *);
-static int sd_eh_action(struct scsi_cmnd *, int);
 static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
 static void scsi_disk_release(struct device *cdev);
 
@@ -592,33 +580,6 @@ static struct class sd_disk_class = {
        .dev_groups     = sd_disk_groups,
 };
 
-static const struct dev_pm_ops sd_pm_ops = {
-       .suspend                = sd_suspend_system,
-       .resume                 = sd_resume_system,
-       .poweroff               = sd_suspend_system,
-       .restore                = sd_resume_system,
-       .runtime_suspend        = sd_suspend_runtime,
-       .runtime_resume         = sd_resume_runtime,
-};
-
-static struct scsi_driver sd_template = {
-       .gendrv = {
-               .name           = "sd",
-               .owner          = THIS_MODULE,
-               .probe          = sd_probe,
-               .probe_type     = PROBE_PREFER_ASYNCHRONOUS,
-               .remove         = sd_remove,
-               .shutdown       = sd_shutdown,
-               .pm             = &sd_pm_ops,
-       },
-       .rescan                 = sd_rescan,
-       .init_command           = sd_init_command,
-       .uninit_command         = sd_uninit_command,
-       .done                   = sd_done,
-       .eh_action              = sd_eh_action,
-       .eh_reset               = sd_eh_reset,
-};
-
 /*
  * Don't request a new module, as that could deadlock in multipath
  * environment.
@@ -3929,6 +3890,33 @@ static int sd_resume_runtime(struct device *dev)
        return sd_resume(dev);
 }
 
+static const struct dev_pm_ops sd_pm_ops = {
+       .suspend                = sd_suspend_system,
+       .resume                 = sd_resume_system,
+       .poweroff               = sd_suspend_system,
+       .restore                = sd_resume_system,
+       .runtime_suspend        = sd_suspend_runtime,
+       .runtime_resume         = sd_resume_runtime,
+};
+
+static struct scsi_driver sd_template = {
+       .gendrv = {
+               .name           = "sd",
+               .owner          = THIS_MODULE,
+               .probe          = sd_probe,
+               .probe_type     = PROBE_PREFER_ASYNCHRONOUS,
+               .remove         = sd_remove,
+               .shutdown       = sd_shutdown,
+               .pm             = &sd_pm_ops,
+       },
+       .rescan                 = sd_rescan,
+       .init_command           = sd_init_command,
+       .uninit_command         = sd_uninit_command,
+       .done                   = sd_done,
+       .eh_action              = sd_eh_action,
+       .eh_reset               = sd_eh_reset,
+};
+
 /**
  *     init_sd - entry point for this driver (both when built in or when
  *     a module).
index e392eaf..0419401 100644 (file)
@@ -710,7 +710,7 @@ typedef u32 pqi_index_t;
 #define SOP_TMF_COMPLETE               0x0
 #define SOP_TMF_REJECTED               0x4
 #define SOP_TMF_FUNCTION_SUCCEEDED     0x8
-#define SOP_RC_INCORRECT_LOGICAL_UNIT  0x9
+#define SOP_TMF_INCORRECT_LOGICAL_UNIT 0x9
 
 /* additional CDB bytes usage field codes */
 #define SOP_ADDITIONAL_CDB_BYTES_0     0       /* 16-byte CDB */
@@ -1085,7 +1085,16 @@ struct pqi_stream_data {
        u32     last_accessed;
 };
 
-#define PQI_MAX_LUNS_PER_DEVICE         256
+#define PQI_MAX_LUNS_PER_DEVICE                256
+
+struct pqi_tmf_work {
+       struct work_struct work_struct;
+       struct scsi_cmnd *scmd;
+       struct pqi_ctrl_info *ctrl_info;
+       struct pqi_scsi_dev *device;
+       u8      lun;
+       u8      scsi_opcode;
+};
 
 struct pqi_scsi_dev {
        int     devtype;                /* as reported by INQUIRY command */
@@ -1111,6 +1120,7 @@ struct pqi_scsi_dev {
        u8      erase_in_progress : 1;
        bool    aio_enabled;            /* only valid for physical disks */
        bool    in_remove;
+       bool    in_reset[PQI_MAX_LUNS_PER_DEVICE];
        bool    device_offline;
        u8      vendor[8];              /* bytes 8-15 of inquiry data */
        u8      model[16];              /* bytes 16-31 of inquiry data */
@@ -1149,6 +1159,8 @@ struct pqi_scsi_dev {
        struct pqi_stream_data stream_data[NUM_STREAMS_PER_LUN];
        atomic_t scsi_cmds_outstanding[PQI_MAX_LUNS_PER_DEVICE];
        unsigned int raid_bypass_cnt;
+
+       struct pqi_tmf_work tmf_work[PQI_MAX_LUNS_PER_DEVICE];
 };
 
 /* VPD inquiry pages */
index 6aaaa7e..9a58df9 100644 (file)
 #define BUILD_TIMESTAMP
 #endif
 
-#define DRIVER_VERSION         "2.1.22-040"
+#define DRIVER_VERSION         "2.1.24-046"
 #define DRIVER_MAJOR           2
 #define DRIVER_MINOR           1
-#define DRIVER_RELEASE         22
-#define DRIVER_REVISION                40
+#define DRIVER_RELEASE         24
+#define DRIVER_REVISION                46
 
 #define DRIVER_NAME            "Microchip SmartPQI Driver (v" \
                                DRIVER_VERSION BUILD_TIMESTAMP ")"
@@ -48,6 +48,8 @@
 #define PQI_POST_RESET_DELAY_SECS                      5
 #define PQI_POST_OFA_RESET_DELAY_UPON_TIMEOUT_SECS     10
 
+#define PQI_NO_COMPLETION      ((void *)-1)
+
 MODULE_AUTHOR("Microchip");
 MODULE_DESCRIPTION("Driver for Microchip Smart Family Controller version "
        DRIVER_VERSION);
@@ -96,6 +98,7 @@ static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info);
 static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
        struct pqi_scsi_dev *device, u8 lun, unsigned long timeout_msecs);
 static void pqi_fail_all_outstanding_requests(struct pqi_ctrl_info *ctrl_info);
+static void pqi_tmf_worker(struct work_struct *work);
 
 /* for flags argument to pqi_submit_raid_request_synchronous() */
 #define PQI_SYNC_FLAGS_INTERRUPTABLE   0x1
@@ -455,6 +458,21 @@ static inline bool pqi_device_in_remove(struct pqi_scsi_dev *device)
        return device->in_remove;
 }
 
+static inline void pqi_device_reset_start(struct pqi_scsi_dev *device, u8 lun)
+{
+       device->in_reset[lun] = true;
+}
+
+static inline void pqi_device_reset_done(struct pqi_scsi_dev *device, u8 lun)
+{
+       device->in_reset[lun] = false;
+}
+
+static inline bool pqi_device_in_reset(struct pqi_scsi_dev *device, u8 lun)
+{
+       return device->in_reset[lun];
+}
+
 static inline int pqi_event_type_to_event_index(unsigned int event_type)
 {
        int index;
@@ -2137,6 +2155,15 @@ static inline bool pqi_is_device_added(struct pqi_scsi_dev *device)
        return device->sdev != NULL;
 }
 
+static inline void pqi_init_device_tmf_work(struct pqi_scsi_dev *device)
+{
+       unsigned int lun;
+       struct pqi_tmf_work *tmf_work;
+
+       for (lun = 0, tmf_work = device->tmf_work; lun < PQI_MAX_LUNS_PER_DEVICE; lun++, tmf_work++)
+               INIT_WORK(&tmf_work->work_struct, pqi_tmf_worker);
+}
+
 static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
        struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices)
 {
@@ -2217,6 +2244,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
                list_add_tail(&device->add_list_entry, &add_list);
                /* To prevent this device structure from being freed later. */
                device->keep_device = true;
+               pqi_init_device_tmf_work(device);
        }
 
        spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
@@ -2257,7 +2285,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
                        device->advertised_queue_depth = device->queue_depth;
                        scsi_change_queue_depth(device->sdev, device->advertised_queue_depth);
                        if (device->rescan) {
-                               scsi_rescan_device(&device->sdev->sdev_gendev);
+                               scsi_rescan_device(device->sdev);
                                device->rescan = false;
                        }
                }
@@ -3330,7 +3358,7 @@ static int pqi_interpret_task_management_response(struct pqi_ctrl_info *ctrl_inf
        case SOP_TMF_REJECTED:
                rc = -EAGAIN;
                break;
-       case SOP_RC_INCORRECT_LOGICAL_UNIT:
+       case SOP_TMF_INCORRECT_LOGICAL_UNIT:
                rc = -ENODEV;
                break;
        default:
@@ -5628,7 +5656,6 @@ static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info,
        int rc;
        struct pqi_io_request *io_request;
        struct pqi_aio_path_request *request;
-       struct pqi_scsi_dev *device;
 
        io_request = pqi_alloc_io_request(ctrl_info, scmd);
        if (!io_request)
@@ -5648,9 +5675,8 @@ static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info,
        request->command_priority = io_high_prio;
        put_unaligned_le16(io_request->index, &request->request_id);
        request->error_index = request->request_id;
-       device = scmd->device->hostdata;
-       if (!pqi_is_logical_device(device) && ctrl_info->multi_lun_device_supported)
-               put_unaligned_le64(((scmd->device->lun) << 8), &request->lun_number);
+       if (!raid_bypass && ctrl_info->multi_lun_device_supported)
+               put_unaligned_le64(scmd->device->lun << 8, &request->lun_number);
        if (cdb_length > sizeof(request->cdb))
                cdb_length = sizeof(request->cdb);
        request->cdb_length = cdb_length;
@@ -5850,6 +5876,7 @@ static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd)
 void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
 {
        struct pqi_scsi_dev *device;
+       struct completion *wait;
 
        if (!scmd->device) {
                set_host_byte(scmd, DID_NO_CONNECT);
@@ -5863,6 +5890,10 @@ void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
        }
 
        atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]);
+
+       wait = (struct completion *)xchg(&scmd->host_scribble, NULL);
+       if (wait != PQI_NO_COMPLETION)
+               complete(wait);
 }
 
 static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info,
@@ -5948,6 +5979,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
        u16 hw_queue;
        struct pqi_queue_group *queue_group;
        bool raid_bypassed;
+       u8 lun;
+
+       scmd->host_scribble = PQI_NO_COMPLETION;
 
        device = scmd->device->hostdata;
 
@@ -5957,7 +5991,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
                return 0;
        }
 
-       atomic_inc(&device->scsi_cmds_outstanding[scmd->device->lun]);
+       lun = (u8)scmd->device->lun;
+
+       atomic_inc(&device->scsi_cmds_outstanding[lun]);
 
        ctrl_info = shost_to_hba(shost);
 
@@ -5967,7 +6003,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
                return 0;
        }
 
-       if (pqi_ctrl_blocked(ctrl_info)) {
+       if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device, lun)) {
                rc = SCSI_MLQUEUE_HOST_BUSY;
                goto out;
        }
@@ -6002,8 +6038,10 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
        }
 
 out:
-       if (rc)
-               atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]);
+       if (rc) {
+               scmd->host_scribble = NULL;
+               atomic_dec(&device->scsi_cmds_outstanding[lun]);
+       }
 
        return rc;
 }
@@ -6097,7 +6135,7 @@ static int pqi_wait_until_inbound_queues_empty(struct pqi_ctrl_info *ctrl_info)
 }
 
 static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
-       struct pqi_scsi_dev *device)
+       struct pqi_scsi_dev *device, u8 lun)
 {
        unsigned int i;
        unsigned int path;
@@ -6127,6 +6165,9 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
                                if (scsi_device != device)
                                        continue;
 
+                               if ((u8)scmd->device->lun != lun)
+                                       continue;
+
                                list_del(&io_request->request_list_entry);
                                set_host_byte(scmd, DID_RESET);
                                pqi_free_io_request(io_request);
@@ -6224,15 +6265,13 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
 
 #define PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS    30
 
-static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
+static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
 {
        int rc;
        struct pqi_io_request *io_request;
        DECLARE_COMPLETION_ONSTACK(wait);
        struct pqi_task_management_request *request;
-       struct pqi_scsi_dev *device;
 
-       device = scmd->device->hostdata;
        io_request = pqi_alloc_io_request(ctrl_info, NULL);
        io_request->io_complete_callback = pqi_lun_reset_complete;
        io_request->context = &wait;
@@ -6247,7 +6286,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
        memcpy(request->lun_number, device->scsi3addr,
                sizeof(request->lun_number));
        if (!pqi_is_logical_device(device) && ctrl_info->multi_lun_device_supported)
-               request->ml_device_lun_number = (u8)scmd->device->lun;
+               request->ml_device_lun_number = lun;
        request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET;
        if (ctrl_info->tmf_iu_timeout_supported)
                put_unaligned_le16(PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS, &request->timeout);
@@ -6255,7 +6294,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
        pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH,
                io_request);
 
-       rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, (u8)scmd->device->lun, &wait);
+       rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, lun, &wait);
        if (rc == 0)
                rc = io_request->status;
 
@@ -6269,18 +6308,16 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
 #define PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS         (10 * 60 * 1000)
 #define PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS  (2 * 60 * 1000)
 
-static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
+static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
 {
        int reset_rc;
        int wait_rc;
        unsigned int retries;
        unsigned long timeout_msecs;
-       struct pqi_scsi_dev *device;
 
-       device = scmd->device->hostdata;
        for (retries = 0;;) {
-               reset_rc = pqi_lun_reset(ctrl_info, scmd);
-               if (reset_rc == 0 || reset_rc == -ENODEV || ++retries > PQI_LUN_RESET_RETRIES)
+               reset_rc = pqi_lun_reset(ctrl_info, device, lun);
+               if (reset_rc == 0 || reset_rc == -ENODEV || reset_rc == -ENXIO || ++retries > PQI_LUN_RESET_RETRIES)
                        break;
                msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS);
        }
@@ -6288,60 +6325,51 @@ static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct sc
        timeout_msecs = reset_rc ? PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS :
                PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS;
 
-       wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, scmd->device->lun, timeout_msecs);
+       wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, lun, timeout_msecs);
        if (wait_rc && reset_rc == 0)
                reset_rc = wait_rc;
 
        return reset_rc == 0 ? SUCCESS : FAILED;
 }
 
-static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
+static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
 {
        int rc;
-       struct pqi_scsi_dev *device;
 
-       device = scmd->device->hostdata;
        pqi_ctrl_block_requests(ctrl_info);
        pqi_ctrl_wait_until_quiesced(ctrl_info);
-       pqi_fail_io_queued_for_device(ctrl_info, device);
+       pqi_fail_io_queued_for_device(ctrl_info, device, lun);
        rc = pqi_wait_until_inbound_queues_empty(ctrl_info);
+       pqi_device_reset_start(device, lun);
+       pqi_ctrl_unblock_requests(ctrl_info);
        if (rc)
                rc = FAILED;
        else
-               rc = pqi_lun_reset_with_retries(ctrl_info, scmd);
-       pqi_ctrl_unblock_requests(ctrl_info);
+               rc = pqi_lun_reset_with_retries(ctrl_info, device, lun);
+       pqi_device_reset_done(device, lun);
 
        return rc;
 }
 
-static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
+static int pqi_device_reset_handler(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun, struct scsi_cmnd *scmd, u8 scsi_opcode)
 {
        int rc;
-       struct Scsi_Host *shost;
-       struct pqi_ctrl_info *ctrl_info;
-       struct pqi_scsi_dev *device;
-
-       shost = scmd->device->host;
-       ctrl_info = shost_to_hba(shost);
-       device = scmd->device->hostdata;
 
        mutex_lock(&ctrl_info->lun_reset_mutex);
 
        dev_err(&ctrl_info->pci_dev->dev,
-               "resetting scsi %d:%d:%d:%d due to cmd 0x%02x\n",
-               shost->host_no,
-               device->bus, device->target, (u32)scmd->device->lun,
-               scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff);
+               "resetting scsi %d:%d:%d:%u SCSI cmd at %p due to cmd opcode 0x%02x\n",
+               ctrl_info->scsi_host->host_no, device->bus, device->target, lun, scmd, scsi_opcode);
 
        pqi_check_ctrl_health(ctrl_info);
        if (pqi_ctrl_offline(ctrl_info))
                rc = FAILED;
        else
-               rc = pqi_device_reset(ctrl_info, scmd);
+               rc = pqi_device_reset(ctrl_info, device, lun);
 
        dev_err(&ctrl_info->pci_dev->dev,
-               "reset of scsi %d:%d:%d:%d: %s\n",
-               shost->host_no, device->bus, device->target, (u32)scmd->device->lun,
+               "reset of scsi %d:%d:%d:%u: %s\n",
+               ctrl_info->scsi_host->host_no, device->bus, device->target, lun,
                rc == SUCCESS ? "SUCCESS" : "FAILED");
 
        mutex_unlock(&ctrl_info->lun_reset_mutex);
@@ -6349,6 +6377,77 @@ static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
        return rc;
 }
 
+static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
+{
+       struct Scsi_Host *shost;
+       struct pqi_ctrl_info *ctrl_info;
+       struct pqi_scsi_dev *device;
+       u8 scsi_opcode;
+
+       shost = scmd->device->host;
+       ctrl_info = shost_to_hba(shost);
+       device = scmd->device->hostdata;
+       scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff;
+
+       return pqi_device_reset_handler(ctrl_info, device, (u8)scmd->device->lun, scmd, scsi_opcode);
+}
+
+static void pqi_tmf_worker(struct work_struct *work)
+{
+       struct pqi_tmf_work *tmf_work;
+       struct scsi_cmnd *scmd;
+
+       tmf_work = container_of(work, struct pqi_tmf_work, work_struct);
+       scmd = (struct scsi_cmnd *)xchg(&tmf_work->scmd, NULL);
+
+       pqi_device_reset_handler(tmf_work->ctrl_info, tmf_work->device, tmf_work->lun, scmd, tmf_work->scsi_opcode);
+}
+
+static int pqi_eh_abort_handler(struct scsi_cmnd *scmd)
+{
+       struct Scsi_Host *shost;
+       struct pqi_ctrl_info *ctrl_info;
+       struct pqi_scsi_dev *device;
+       struct pqi_tmf_work *tmf_work;
+       DECLARE_COMPLETION_ONSTACK(wait);
+
+       shost = scmd->device->host;
+       ctrl_info = shost_to_hba(shost);
+       device = scmd->device->hostdata;
+
+       dev_err(&ctrl_info->pci_dev->dev,
+               "attempting TASK ABORT on scsi %d:%d:%d:%d for SCSI cmd at %p\n",
+               shost->host_no, device->bus, device->target, (int)scmd->device->lun, scmd);
+
+       if (cmpxchg(&scmd->host_scribble, PQI_NO_COMPLETION, (void *)&wait) == NULL) {
+               dev_err(&ctrl_info->pci_dev->dev,
+                       "scsi %d:%d:%d:%d for SCSI cmd at %p already completed\n",
+                       shost->host_no, device->bus, device->target, (int)scmd->device->lun, scmd);
+               scmd->result = DID_RESET << 16;
+               goto out;
+       }
+
+       tmf_work = &device->tmf_work[scmd->device->lun];
+
+       if (cmpxchg(&tmf_work->scmd, NULL, scmd) == NULL) {
+               tmf_work->ctrl_info = ctrl_info;
+               tmf_work->device = device;
+               tmf_work->lun = (u8)scmd->device->lun;
+               tmf_work->scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff;
+               schedule_work(&tmf_work->work_struct);
+       }
+
+       wait_for_completion(&wait);
+
+       dev_err(&ctrl_info->pci_dev->dev,
+               "TASK ABORT on scsi %d:%d:%d:%d for SCSI cmd at %p: SUCCESS\n",
+               shost->host_no, device->bus, device->target, (int)scmd->device->lun, scmd);
+
+out:
+
+       return SUCCESS;
+}
+
 static int pqi_slave_alloc(struct scsi_device *sdev)
 {
        struct pqi_scsi_dev *device;
@@ -6470,21 +6569,21 @@ static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info, void __user *ar
        struct pci_dev *pci_dev;
        u32 subsystem_vendor;
        u32 subsystem_device;
-       cciss_pci_info_struct pciinfo;
+       cciss_pci_info_struct pci_info;
 
        if (!arg)
                return -EINVAL;
 
        pci_dev = ctrl_info->pci_dev;
 
-       pciinfo.domain = pci_domain_nr(pci_dev->bus);
-       pciinfo.bus = pci_dev->bus->number;
-       pciinfo.dev_fn = pci_dev->devfn;
+       pci_info.domain = pci_domain_nr(pci_dev->bus);
+       pci_info.bus = pci_dev->bus->number;
+       pci_info.dev_fn = pci_dev->devfn;
        subsystem_vendor = pci_dev->subsystem_vendor;
        subsystem_device = pci_dev->subsystem_device;
-       pciinfo.board_id = ((subsystem_device << 16) & 0xffff0000) | subsystem_vendor;
+       pci_info.board_id = ((subsystem_device << 16) & 0xffff0000) | subsystem_vendor;
 
-       if (copy_to_user(arg, &pciinfo, sizeof(pciinfo)))
+       if (copy_to_user(arg, &pci_info, sizeof(pci_info)))
                return -EFAULT;
 
        return 0;
@@ -7362,6 +7461,7 @@ static const struct scsi_host_template pqi_driver_template = {
        .scan_finished = pqi_scan_finished,
        .this_id = -1,
        .eh_device_reset_handler = pqi_eh_device_reset_handler,
+       .eh_abort_handler = pqi_eh_abort_handler,
        .ioctl = pqi_ioctl,
        .slave_alloc = pqi_slave_alloc,
        .slave_configure = pqi_slave_configure,
@@ -8904,6 +9004,52 @@ static void pqi_ctrl_offline_worker(struct work_struct *work)
        pqi_take_ctrl_offline_deferred(ctrl_info);
 }
 
+static char *pqi_ctrl_shutdown_reason_to_string(enum pqi_ctrl_shutdown_reason ctrl_shutdown_reason)
+{
+       char *string;
+
+       switch (ctrl_shutdown_reason) {
+       case PQI_IQ_NOT_DRAINED_TIMEOUT:
+               string = "inbound queue not drained timeout";
+               break;
+       case PQI_LUN_RESET_TIMEOUT:
+               string = "LUN reset timeout";
+               break;
+       case PQI_IO_PENDING_POST_LUN_RESET_TIMEOUT:
+               string = "I/O pending timeout after LUN reset";
+               break;
+       case PQI_NO_HEARTBEAT:
+               string = "no controller heartbeat detected";
+               break;
+       case PQI_FIRMWARE_KERNEL_NOT_UP:
+               string = "firmware kernel not ready";
+               break;
+       case PQI_OFA_RESPONSE_TIMEOUT:
+               string = "OFA response timeout";
+               break;
+       case PQI_INVALID_REQ_ID:
+               string = "invalid request ID";
+               break;
+       case PQI_UNMATCHED_REQ_ID:
+               string = "unmatched request ID";
+               break;
+       case PQI_IO_PI_OUT_OF_RANGE:
+               string = "I/O queue producer index out of range";
+               break;
+       case PQI_EVENT_PI_OUT_OF_RANGE:
+               string = "event queue producer index out of range";
+               break;
+       case PQI_UNEXPECTED_IU_TYPE:
+               string = "unexpected IU type";
+               break;
+       default:
+               string = "unknown reason";
+               break;
+       }
+
+       return string;
+}
+
 static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info,
        enum pqi_ctrl_shutdown_reason ctrl_shutdown_reason)
 {
@@ -8916,7 +9062,9 @@ static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info,
        if (!pqi_disable_ctrl_shutdown)
                sis_shutdown_ctrl(ctrl_info, ctrl_shutdown_reason);
        pci_disable_device(ctrl_info->pci_dev);
-       dev_err(&ctrl_info->pci_dev->dev, "controller offline\n");
+       dev_err(&ctrl_info->pci_dev->dev,
+               "controller offline: reason code 0x%x (%s)\n",
+               ctrl_shutdown_reason, pqi_ctrl_shutdown_reason_to_string(ctrl_shutdown_reason));
        schedule_work(&ctrl_info->ctrl_offline_work);
 }
 
@@ -9062,7 +9210,7 @@ static void pqi_shutdown(struct pci_dev *pci_dev)
        rc = pqi_flush_cache(ctrl_info, shutdown_event);
        if (rc)
                dev_err(&pci_dev->dev,
-                       "unable to flush controller cache\n");
+                       "unable to flush controller cache during shutdown\n");
 
        pqi_crash_if_pending_command(ctrl_info);
        pqi_reset(ctrl_info);
index 14d7981..338aa8c 100644 (file)
@@ -414,6 +414,8 @@ static int st_chk_result(struct scsi_tape *STp, struct st_request * SRpnt)
        if (cmdstatp->have_sense &&
            cmdstatp->sense_hdr.asc == 0 && cmdstatp->sense_hdr.ascq == 0x17)
                STp->cleaning_req = 1; /* ASC and ASCQ => cleaning requested */
+       if (cmdstatp->have_sense && scode == UNIT_ATTENTION && cmdstatp->sense_hdr.asc == 0x29)
+               STp->pos_unknown = 1; /* ASC => power on / reset */
 
        STp->pos_unknown |= STp->device->was_reset;
 
index 0686255..a95936b 100644 (file)
@@ -475,7 +475,7 @@ static void storvsc_device_scan(struct work_struct *work)
        sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun);
        if (!sdev)
                goto done;
-       scsi_rescan_device(&sdev->sdev_gendev);
+       scsi_rescan_device(sdev);
        scsi_device_put(sdev);
 
 done:
index bd56336..9d1bdcd 100644 (file)
@@ -325,7 +325,7 @@ static void virtscsi_handle_param_change(struct virtio_scsi *vscsi,
        /* Handle "Parameters changed", "Mode parameters changed", and
           "Capacity data has changed".  */
        if (asc == 0x2a && (ascq == 0x00 || ascq == 0x01 || ascq == 0x09))
-               scsi_rescan_device(&sdev->sdev_gendev);
+               scsi_rescan_device(sdev);
 
        scsi_device_put(sdev);
 }
index caae61a..9ec55dd 100644 (file)
@@ -743,7 +743,7 @@ static int scsifront_sdev_configure(struct scsi_device *sdev)
        if (info->host_active == STATE_ERROR)
                return -EIO;
 
-       if (info && current == info->curr) {
+       if (current == info->curr) {
                err = xenbus_printf(XBT_NIL, info->dev->nodename,
                              info->dev_state_path, "%d", XenbusStateConnected);
                if (err) {
@@ -761,7 +761,7 @@ static void scsifront_sdev_destroy(struct scsi_device *sdev)
        struct vscsifrnt_info *info = shost_priv(sdev->host);
        int err;
 
-       if (info && current == info->curr) {
+       if (current == info->curr) {
                err = xenbus_printf(XBT_NIL, info->dev->nodename,
                              info->dev_state_path, "%d", XenbusStateClosed);
                if (err)
@@ -903,7 +903,7 @@ static int scsifront_probe(struct xenbus_device *dev,
                xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
                return err;
        }
-       info = (struct vscsifrnt_info *)host->hostdata;
+       info = shost_priv(host);
 
        dev_set_drvdata(&dev->dev, info);
        info->dev = dev;
index de31589..5a75ab6 100644 (file)
@@ -334,6 +334,11 @@ if RISCV
 config ARCH_R9A07G043
        bool "RISC-V Platform support for RZ/Five"
        select ARCH_RZG2L
+       select AX45MP_L2_CACHE if RISCV_DMA_NONCOHERENT
+       select DMA_GLOBAL_POOL
+       select ERRATA_ANDES if RISCV_SBI
+       select ERRATA_ANDES_CMO if ERRATA_ANDES
+
        help
          This enables support for the Renesas RZ/Five SoC.
 
index 3f5b155..fddc633 100644 (file)
@@ -106,6 +106,7 @@ struct sun6i_spi {
        struct reset_control    *rstc;
 
        struct completion       done;
+       struct completion       dma_rx_done;
 
        const u8                *tx_buf;
        u8                      *rx_buf;
@@ -200,6 +201,13 @@ static size_t sun6i_spi_max_transfer_size(struct spi_device *spi)
        return SUN6I_MAX_XFER_SIZE - 1;
 }
 
+static void sun6i_spi_dma_rx_cb(void *param)
+{
+       struct sun6i_spi *sspi = param;
+
+       complete(&sspi->dma_rx_done);
+}
+
 static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
                                 struct spi_transfer *tfr)
 {
@@ -211,7 +219,7 @@ static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
                struct dma_slave_config rxconf = {
                        .direction = DMA_DEV_TO_MEM,
                        .src_addr = sspi->dma_addr_rx,
-                       .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+                       .src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
                        .src_maxburst = 8,
                };
 
@@ -224,6 +232,8 @@ static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
                                                 DMA_PREP_INTERRUPT);
                if (!rxdesc)
                        return -EINVAL;
+               rxdesc->callback_param = sspi;
+               rxdesc->callback = sun6i_spi_dma_rx_cb;
        }
 
        txdesc = NULL;
@@ -279,6 +289,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
                return -EINVAL;
 
        reinit_completion(&sspi->done);
+       reinit_completion(&sspi->dma_rx_done);
        sspi->tx_buf = tfr->tx_buf;
        sspi->rx_buf = tfr->rx_buf;
        sspi->len = tfr->len;
@@ -479,6 +490,22 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
        start = jiffies;
        timeout = wait_for_completion_timeout(&sspi->done,
                                              msecs_to_jiffies(tx_time));
+
+       if (!use_dma) {
+               sun6i_spi_drain_fifo(sspi);
+       } else {
+               if (timeout && rx_len) {
+                       /*
+                        * Even though RX on the peripheral side has finished
+                        * RX DMA might still be in flight
+                        */
+                       timeout = wait_for_completion_timeout(&sspi->dma_rx_done,
+                                                             timeout);
+                       if (!timeout)
+                               dev_warn(&master->dev, "RX DMA timeout\n");
+               }
+       }
+
        end = jiffies;
        if (!timeout) {
                dev_warn(&master->dev,
@@ -506,7 +533,6 @@ static irqreturn_t sun6i_spi_handler(int irq, void *dev_id)
        /* Transfer complete */
        if (status & SUN6I_INT_CTL_TC) {
                sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
-               sun6i_spi_drain_fifo(sspi);
                complete(&sspi->done);
                return IRQ_HANDLED;
        }
@@ -665,6 +691,7 @@ static int sun6i_spi_probe(struct platform_device *pdev)
        }
 
        init_completion(&sspi->done);
+       init_completion(&sspi->dma_rx_done);
 
        sspi->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
        if (IS_ERR(sspi->rstc)) {
index f569d37..57cc196 100644 (file)
@@ -266,7 +266,7 @@ static int gb_pwm_probe(struct gbphy_device *gbphy_dev,
 {
        struct gb_connection *connection;
        struct gb_pwm_chip *pwmc;
-       struct pwm_chip *pwm;
+       struct pwm_chip *chip;
        int ret;
 
        pwmc = kzalloc(sizeof(*pwmc), GFP_KERNEL);
@@ -294,13 +294,13 @@ static int gb_pwm_probe(struct gbphy_device *gbphy_dev,
        if (ret)
                goto exit_connection_disable;
 
-       pwm = &pwmc->chip;
+       chip = &pwmc->chip;
 
-       pwm->dev = &gbphy_dev->dev;
-       pwm->ops = &gb_pwm_ops;
-       pwm->npwm = pwmc->pwm_max + 1;
+       chip->dev = &gbphy_dev->dev;
+       chip->ops = &gb_pwm_ops;
+       chip->npwm = pwmc->pwm_max + 1;
 
-       ret = pwmchip_add(pwm);
+       ret = pwmchip_add(chip);
        if (ret) {
                dev_err(&gbphy_dev->dev,
                        "failed to register PWM: %d\n", ret);
index 9767159..abf5c72 100644 (file)
@@ -606,4 +606,4 @@ MODULE_DESCRIPTION("Spase SP8870 DVB-T Demodulator driver");
 MODULE_AUTHOR("Juergen Peitz");
 MODULE_LICENSE("GPL");
 
-EXPORT_SYMBOL(sp8870_attach);
+EXPORT_SYMBOL_GPL(sp8870_attach);
index 19a4b33..c81a00f 100644 (file)
@@ -510,4 +510,16 @@ config KHADAS_MCU_FAN_THERMAL
          If you say yes here you get support for the FAN controlled
          by the Microcontroller found on the Khadas VIM boards.
 
+config LOONGSON2_THERMAL
+       tristate "Loongson-2 SoC series thermal driver"
+       depends on LOONGARCH || COMPILE_TEST
+       depends on OF
+       help
+         Support for Thermal driver found on Loongson-2 SoC series platforms.
+         The thermal driver realizes get_temp and set_trips function, which
+         are used to obtain the temperature of the current node and set the
+         temperature range to trigger the interrupt. When the input temperature
+         is higher than the high temperature threshold or lower than the low
+         temperature threshold, the interrupt will occur.
+
 endif
index 058664b..c934cab 100644 (file)
@@ -63,3 +63,4 @@ obj-$(CONFIG_UNIPHIER_THERMAL)        += uniphier_thermal.o
 obj-$(CONFIG_AMLOGIC_THERMAL)     += amlogic_thermal.o
 obj-$(CONFIG_SPRD_THERMAL)     += sprd_thermal.o
 obj-$(CONFIG_KHADAS_MCU_FAN_THERMAL)   += khadas_mcu_fan.o
+obj-$(CONFIG_LOONGSON2_THERMAL)        += loongson2_thermal.o
index 9f6dc4f..f00765b 100644 (file)
@@ -876,8 +876,9 @@ static int armada_thermal_probe(struct platform_device *pdev)
                /* Wait the sensors to be valid */
                armada_wait_sensor_validity(priv);
 
-               tz = thermal_zone_device_register(priv->zone_name, 0, 0, priv,
-                                                 &legacy_ops, NULL, 0, 0);
+               tz = thermal_tripless_zone_device_register(priv->zone_name,
+                                                          priv, &legacy_ops,
+                                                          NULL);
                if (IS_ERR(tz)) {
                        dev_err(&pdev->dev,
                                "Failed to register thermal zone device\n");
index 0b73abd..9674e5f 100644 (file)
@@ -334,7 +334,6 @@ static int brcmstb_thermal_probe(struct platform_device *pdev)
                return PTR_ERR(priv->tmon_base);
 
        priv->dev = &pdev->dev;
-       platform_set_drvdata(pdev, priv);
        of_ops = priv->temp_params->of_ops;
 
        thermal = devm_thermal_of_zone_register(&pdev->dev, 0, priv,
index 7479158..9a29dfd 100644 (file)
@@ -91,7 +91,6 @@ static int sr_thermal_probe(struct platform_device *pdev)
 
                dev_dbg(dev, "thermal sensor %d registered\n", i);
        }
-       platform_set_drvdata(pdev, sr_thermal);
 
        return 0;
 }
index fca5c2c..576f88b 100644 (file)
@@ -229,7 +229,7 @@ MODULE_DEVICE_TABLE(of, db8500_thermal_match);
 static struct platform_driver db8500_thermal_driver = {
        .driver = {
                .name = "db8500-thermal",
-               .of_match_table = of_match_ptr(db8500_thermal_match),
+               .of_match_table = db8500_thermal_match,
        },
        .probe = db8500_thermal_probe,
        .suspend = db8500_thermal_suspend,
index 9954040..7a18cb9 100644 (file)
@@ -139,8 +139,8 @@ static int dove_thermal_probe(struct platform_device *pdev)
                return ret;
        }
 
-       thermal = thermal_zone_device_register("dove_thermal", 0, 0,
-                                              priv, &ops, NULL, 0, 0);
+       thermal = thermal_tripless_zone_device_register("dove_thermal", priv,
+                                                       &ops, NULL);
        if (IS_ERR(thermal)) {
                dev_err(&pdev->dev,
                        "Failed to register thermal zone device\n");
index e89b11b..14111cc 100644 (file)
@@ -178,10 +178,8 @@ static int imx8mm_tmu_probe_set_calib_v1(struct platform_device *pdev,
        int ret;
 
        ret = nvmem_cell_read_u32(&pdev->dev, "calib", &ana0);
-       if (ret) {
-               dev_warn(dev, "Failed to read OCOTP nvmem cell (%d).\n", ret);
-               return ret;
-       }
+       if (ret)
+               return dev_err_probe(dev, ret, "Failed to read OCOTP nvmem cell\n");
 
        writel(FIELD_PREP(TASR_BUF_VREF_MASK,
                          FIELD_GET(ANA0_BUF_VREF_MASK, ana0)) |
index ddd6008..ffc2871 100644 (file)
@@ -609,9 +609,9 @@ static int int3400_thermal_probe(struct platform_device *pdev)
 
        evaluate_odvp(priv);
 
-       priv->thermal = thermal_zone_device_register("INT3400 Thermal", 0, 0,
-                                               priv, &int3400_thermal_ops,
-                                               &int3400_thermal_params, 0, 0);
+       priv->thermal = thermal_tripless_zone_device_register("INT3400 Thermal", priv,
+                                                             &int3400_thermal_ops,
+                                                             &int3400_thermal_params);
        if (IS_ERR(priv->thermal)) {
                result = PTR_ERR(priv->thermal);
                goto free_art_trt;
index 68f59b3..4a918c1 100644 (file)
@@ -225,7 +225,6 @@ static int k3_bandgap_probe(struct platform_device *pdev)
                devm_thermal_add_hwmon_sysfs(dev, data[id].tzd);
        }
 
-       platform_set_drvdata(pdev, bgp);
 
        return 0;
 
index a5a0fc9..2fc799b 100644 (file)
@@ -502,8 +502,6 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
        writel(K3_VTM_ANYMAXT_OUTRG_ALERT_EN, data[0].bgp->cfg2_base +
               K3_VTM_MISC_CTRL_OFFSET);
 
-       platform_set_drvdata(pdev, bgp);
-
        print_look_up_table(dev, ref_table);
        /*
         * Now that the derived_table has the appropriate look up values
index 668747b..acb10d2 100644 (file)
@@ -71,8 +71,8 @@ static int kirkwood_thermal_probe(struct platform_device *pdev)
        if (IS_ERR(priv->sensor))
                return PTR_ERR(priv->sensor);
 
-       thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0,
-                                              priv, &ops, NULL, 0, 0);
+       thermal = thermal_tripless_zone_device_register("kirkwood_thermal",
+                                                       priv, &ops, NULL);
        if (IS_ERR(thermal)) {
                dev_err(&pdev->dev,
                        "Failed to register thermal zone device\n");
diff --git a/drivers/thermal/loongson2_thermal.c b/drivers/thermal/loongson2_thermal.c
new file mode 100644 (file)
index 0000000..133098d
--- /dev/null
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: zhanghongchen <zhanghongchen@loongson.cn>
+ *         Yinbo Zhu <zhuyinbo@loongson.cn>
+ * Copyright (C) 2022-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+#include <linux/units.h>
+#include "thermal_hwmon.h"
+
+#define LOONGSON2_MAX_SENSOR_SEL_NUM                   3
+
+#define LOONGSON2_THSENS_CTRL_HI_REG                   0x0
+#define LOONGSON2_THSENS_CTRL_LOW_REG                  0x8
+#define LOONGSON2_THSENS_STATUS_REG                    0x10
+#define LOONGSON2_THSENS_OUT_REG                       0x14
+
+#define LOONGSON2_THSENS_INT_LO                                BIT(0)
+#define LOONGSON2_THSENS_INT_HIGH                      BIT(1)
+#define LOONGSON2_THSENS_OUT_MASK                      0xFF
+
+struct loongson2_thermal_chip_data {
+       unsigned int    thermal_sensor_sel;
+};
+
+struct loongson2_thermal_data {
+       void __iomem    *regs;
+       const struct loongson2_thermal_chip_data *chip_data;
+};
+
+static int loongson2_thermal_set(struct loongson2_thermal_data *data,
+                                       int low, int high, bool enable)
+{
+       u64 reg_ctrl = 0;
+       int reg_off = data->chip_data->thermal_sensor_sel * 2;
+
+       low = clamp(-40, low, high);
+       high = clamp(125, low, high);
+
+       low += HECTO;
+       high += HECTO;
+
+       reg_ctrl = low;
+       reg_ctrl |= enable ? 0x100 : 0;
+       writew(reg_ctrl, data->regs + LOONGSON2_THSENS_CTRL_LOW_REG + reg_off);
+
+       reg_ctrl = high;
+       reg_ctrl |= enable ? 0x100 : 0;
+       writew(reg_ctrl, data->regs + LOONGSON2_THSENS_CTRL_HI_REG + reg_off);
+
+       return 0;
+}
+
+static int loongson2_thermal_get_temp(struct thermal_zone_device *tz, int *temp)
+{
+       u32 reg_val;
+       struct loongson2_thermal_data *data = thermal_zone_device_priv(tz);
+
+       reg_val = readl(data->regs + LOONGSON2_THSENS_OUT_REG);
+       *temp = ((reg_val & LOONGSON2_THSENS_OUT_MASK) - HECTO) * KILO;
+
+       return 0;
+}
+
+static irqreturn_t loongson2_thermal_irq_thread(int irq, void *dev)
+{
+       struct thermal_zone_device *tzd = dev;
+       struct loongson2_thermal_data *data = thermal_zone_device_priv(tzd);
+
+       writeb(LOONGSON2_THSENS_INT_LO | LOONGSON2_THSENS_INT_HIGH, data->regs +
+               LOONGSON2_THSENS_STATUS_REG);
+
+       thermal_zone_device_update(tzd, THERMAL_EVENT_UNSPECIFIED);
+
+       return IRQ_HANDLED;
+}
+
+static int loongson2_thermal_set_trips(struct thermal_zone_device *tz, int low, int high)
+{
+       struct loongson2_thermal_data *data = thermal_zone_device_priv(tz);
+
+       return loongson2_thermal_set(data, low/MILLI, high/MILLI, true);
+}
+
+static const struct thermal_zone_device_ops loongson2_of_thermal_ops = {
+       .get_temp = loongson2_thermal_get_temp,
+       .set_trips = loongson2_thermal_set_trips,
+};
+
+static int loongson2_thermal_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct loongson2_thermal_data *data;
+       struct thermal_zone_device *tzd;
+       int ret, irq, i;
+
+       data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       data->chip_data = device_get_match_data(dev);
+
+       data->regs = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(data->regs))
+               return PTR_ERR(data->regs);
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0)
+               return irq;
+
+       writeb(LOONGSON2_THSENS_INT_LO | LOONGSON2_THSENS_INT_HIGH, data->regs +
+               LOONGSON2_THSENS_STATUS_REG);
+
+       loongson2_thermal_set(data, 0, 0, false);
+
+       for (i = 0; i <= LOONGSON2_MAX_SENSOR_SEL_NUM; i++) {
+               tzd = devm_thermal_of_zone_register(dev, i, data,
+                       &loongson2_of_thermal_ops);
+
+               if (!IS_ERR(tzd))
+                       break;
+
+               if (PTR_ERR(tzd) != ENODEV)
+                       continue;
+
+               return dev_err_probe(dev, PTR_ERR(tzd), "failed to register");
+       }
+
+       ret = devm_request_threaded_irq(dev, irq, NULL, loongson2_thermal_irq_thread,
+                       IRQF_ONESHOT, "loongson2_thermal", tzd);
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "failed to request alarm irq\n");
+
+       devm_thermal_add_hwmon_sysfs(dev, tzd);
+
+       return 0;
+}
+
+static const struct loongson2_thermal_chip_data loongson2_thermal_ls2k1000_data = {
+       .thermal_sensor_sel = 0,
+};
+
+static const struct of_device_id of_loongson2_thermal_match[] = {
+       {
+               .compatible = "loongson,ls2k1000-thermal",
+               .data = &loongson2_thermal_ls2k1000_data,
+       },
+       { /* end */ }
+};
+MODULE_DEVICE_TABLE(of, of_loongson2_thermal_match);
+
+static struct platform_driver loongson2_thermal_driver = {
+       .driver = {
+               .name           = "loongson2_thermal",
+               .of_match_table = of_loongson2_thermal_match,
+       },
+       .probe  = loongson2_thermal_probe,
+};
+module_platform_driver(loongson2_thermal_driver);
+
+MODULE_DESCRIPTION("Loongson2 thermal driver");
+MODULE_LICENSE("GPL");
index 61c7622..919b6ee 100644 (file)
@@ -139,8 +139,6 @@ static int max77620_thermal_probe(struct platform_device *pdev)
                return ret;
        }
 
-       platform_set_drvdata(pdev, mtherm);
-
        return 0;
 }
 
index c537aed..843214d 100644 (file)
@@ -1282,8 +1282,6 @@ static int mtk_thermal_probe(struct platform_device *pdev)
                        mtk_thermal_init_bank(mt, i, apmixed_phys_base,
                                              auxadc_phys_base, ctrl_id);
 
-       platform_set_drvdata(pdev, mt);
-
        tzdev = devm_thermal_of_zone_register(&pdev->dev, 0, mt,
                                              &mtk_thermal_ops);
        if (IS_ERR(tzdev))
index 054c965..effd9b0 100644 (file)
 #define LVTS_PROTTC(__base)            (__base + 0x00CC)
 #define LVTS_CLKEN(__base)             (__base + 0x00E4)
 
-#define LVTS_PERIOD_UNIT                       ((118 * 1000) / (256 * 38))
-#define LVTS_GROUP_INTERVAL                    1
-#define LVTS_FILTER_INTERVAL           1
-#define LVTS_SENSOR_INTERVAL           1
-#define LVTS_HW_FILTER                         0x2
+#define LVTS_PERIOD_UNIT                       0
+#define LVTS_GROUP_INTERVAL                    0
+#define LVTS_FILTER_INTERVAL           0
+#define LVTS_SENSOR_INTERVAL           0
+#define LVTS_HW_FILTER                         0x0
 #define LVTS_TSSEL_CONF                                0x13121110
 #define LVTS_CALSCALE_CONF                     0x300
-#define LVTS_MONINT_CONF                       0x9FBF7BDE
+#define LVTS_MONINT_CONF                       0x8300318C
+
+#define LVTS_MONINT_OFFSET_SENSOR0             0xC
+#define LVTS_MONINT_OFFSET_SENSOR1             0x180
+#define LVTS_MONINT_OFFSET_SENSOR2             0x3000
+#define LVTS_MONINT_OFFSET_SENSOR3             0x3000000
 
 #define LVTS_INT_SENSOR0                       0x0009001F
 #define LVTS_INT_SENSOR1                       0x001203E0
 #define LVTS_MSR_IMMEDIATE_MODE                0
 #define LVTS_MSR_FILTERED_MODE         1
 
+#define LVTS_MSR_READ_TIMEOUT_US       400
+#define LVTS_MSR_READ_WAIT_US          (LVTS_MSR_READ_TIMEOUT_US / 2)
+
 #define LVTS_HW_SHUTDOWN_MT8195                105000
 
+#define LVTS_MINIMUM_THRESHOLD         20000
+
 static int golden_temp = LVTS_GOLDEN_TEMP_DEFAULT;
 static int coeff_b = LVTS_COEFF_B;
 
@@ -110,6 +120,8 @@ struct lvts_sensor {
        void __iomem *base;
        int id;
        int dt_id;
+       int low_thresh;
+       int high_thresh;
 };
 
 struct lvts_ctrl {
@@ -119,6 +131,8 @@ struct lvts_ctrl {
        int num_lvts_sensor;
        int mode;
        void __iomem *base;
+       int low_thresh;
+       int high_thresh;
 };
 
 struct lvts_domain {
@@ -190,7 +204,7 @@ static int lvts_debugfs_init(struct device *dev, struct lvts_domain *lvts_td)
        int i;
 
        lvts_td->dom_dentry = debugfs_create_dir(dev_name(dev), NULL);
-       if (!lvts_td->dom_dentry)
+       if (IS_ERR(lvts_td->dom_dentry))
                return 0;
 
        for (i = 0; i < lvts_td->num_lvts_ctrl; i++) {
@@ -257,6 +271,7 @@ static int lvts_get_temp(struct thermal_zone_device *tz, int *temp)
        struct lvts_sensor *lvts_sensor = thermal_zone_device_priv(tz);
        void __iomem *msr = lvts_sensor->msr;
        u32 value;
+       int rc;
 
        /*
         * Measurement registers:
@@ -269,7 +284,8 @@ static int lvts_get_temp(struct thermal_zone_device *tz, int *temp)
         * 16   : Valid temperature
         * 15-0 : Raw temperature
         */
-       value = readl(msr);
+       rc = readl_poll_timeout(msr, value, value & BIT(16),
+                               LVTS_MSR_READ_WAIT_US, LVTS_MSR_READ_TIMEOUT_US);
 
        /*
         * As the thermal zone temperature will read before the
@@ -282,7 +298,7 @@ static int lvts_get_temp(struct thermal_zone_device *tz, int *temp)
         * functionning temperature and directly jump to a system
         * shutdown.
         */
-       if (!(value & BIT(16)))
+       if (rc)
                return -EAGAIN;
 
        *temp = lvts_raw_to_temp(value & 0xFFFF);
@@ -290,32 +306,84 @@ static int lvts_get_temp(struct thermal_zone_device *tz, int *temp)
        return 0;
 }
 
+static void lvts_update_irq_mask(struct lvts_ctrl *lvts_ctrl)
+{
+       u32 masks[] = {
+               LVTS_MONINT_OFFSET_SENSOR0,
+               LVTS_MONINT_OFFSET_SENSOR1,
+               LVTS_MONINT_OFFSET_SENSOR2,
+               LVTS_MONINT_OFFSET_SENSOR3,
+       };
+       u32 value = 0;
+       int i;
+
+       value = readl(LVTS_MONINT(lvts_ctrl->base));
+
+       for (i = 0; i < ARRAY_SIZE(masks); i++) {
+               if (lvts_ctrl->sensors[i].high_thresh == lvts_ctrl->high_thresh
+                   && lvts_ctrl->sensors[i].low_thresh == lvts_ctrl->low_thresh)
+                       value |= masks[i];
+               else
+                       value &= ~masks[i];
+       }
+
+       writel(value, LVTS_MONINT(lvts_ctrl->base));
+}
+
+static bool lvts_should_update_thresh(struct lvts_ctrl *lvts_ctrl, int high)
+{
+       int i;
+
+       if (high > lvts_ctrl->high_thresh)
+               return true;
+
+       for (i = 0; i < lvts_ctrl->num_lvts_sensor; i++)
+               if (lvts_ctrl->sensors[i].high_thresh == lvts_ctrl->high_thresh
+                   && lvts_ctrl->sensors[i].low_thresh == lvts_ctrl->low_thresh)
+                       return false;
+
+       return true;
+}
+
 static int lvts_set_trips(struct thermal_zone_device *tz, int low, int high)
 {
        struct lvts_sensor *lvts_sensor = thermal_zone_device_priv(tz);
+       struct lvts_ctrl *lvts_ctrl = container_of(lvts_sensor, struct lvts_ctrl, sensors[lvts_sensor->id]);
        void __iomem *base = lvts_sensor->base;
-       u32 raw_low = lvts_temp_to_raw(low);
+       u32 raw_low = lvts_temp_to_raw(low != -INT_MAX ? low : LVTS_MINIMUM_THRESHOLD);
        u32 raw_high = lvts_temp_to_raw(high);
+       bool should_update_thresh;
+
+       lvts_sensor->low_thresh = low;
+       lvts_sensor->high_thresh = high;
+
+       should_update_thresh = lvts_should_update_thresh(lvts_ctrl, high);
+       if (should_update_thresh) {
+               lvts_ctrl->high_thresh = high;
+               lvts_ctrl->low_thresh = low;
+       }
+       lvts_update_irq_mask(lvts_ctrl);
+
+       if (!should_update_thresh)
+               return 0;
 
        /*
-        * Hot to normal temperature threshold
+        * Low offset temperature threshold
         *
-        * LVTS_H2NTHRE
+        * LVTS_OFFSETL
         *
         * Bits:
         *
         * 14-0 : Raw temperature for threshold
         */
-       if (low != -INT_MAX) {
-               pr_debug("%s: Setting low limit temperature interrupt: %d\n",
-                        thermal_zone_device_type(tz), low);
-               writel(raw_low, LVTS_H2NTHRE(base));
-       }
+       pr_debug("%s: Setting low limit temperature interrupt: %d\n",
+                thermal_zone_device_type(tz), low);
+       writel(raw_low, LVTS_OFFSETL(base));
 
        /*
-        * Hot temperature threshold
+        * High offset temperature threshold
         *
-        * LVTS_HTHRE
+        * LVTS_OFFSETH
         *
         * Bits:
         *
@@ -323,7 +391,7 @@ static int lvts_set_trips(struct thermal_zone_device *tz, int low, int high)
         */
        pr_debug("%s: Setting high limit temperature interrupt: %d\n",
                 thermal_zone_device_type(tz), high);
-       writel(raw_high, LVTS_HTHRE(base));
+       writel(raw_high, LVTS_OFFSETH(base));
 
        return 0;
 }
@@ -451,7 +519,7 @@ static irqreturn_t lvts_irq_handler(int irq, void *data)
 
        for (i = 0; i < lvts_td->num_lvts_ctrl; i++) {
 
-               aux = lvts_ctrl_irq_handler(lvts_td->lvts_ctrl);
+               aux = lvts_ctrl_irq_handler(&lvts_td->lvts_ctrl[i]);
                if (aux != IRQ_HANDLED)
                        continue;
 
@@ -521,6 +589,9 @@ static int lvts_sensor_init(struct device *dev, struct lvts_ctrl *lvts_ctrl,
                 */
                lvts_sensor[i].msr = lvts_ctrl_data->mode == LVTS_MSR_IMMEDIATE_MODE ?
                        imm_regs[i] : msr_regs[i];
+
+               lvts_sensor[i].low_thresh = INT_MIN;
+               lvts_sensor[i].high_thresh = INT_MIN;
        };
 
        lvts_ctrl->num_lvts_sensor = lvts_ctrl_data->num_lvts_sensor;
@@ -688,6 +759,9 @@ static int lvts_ctrl_init(struct device *dev, struct lvts_domain *lvts_td,
                 */
                lvts_ctrl[i].hw_tshut_raw_temp =
                        lvts_temp_to_raw(lvts_data->lvts_ctrl[i].hw_tshut_temp);
+
+               lvts_ctrl[i].low_thresh = INT_MIN;
+               lvts_ctrl[i].high_thresh = INT_MIN;
        }
 
        /*
@@ -897,24 +971,6 @@ static int lvts_ctrl_configure(struct device *dev, struct lvts_ctrl *lvts_ctrl)
        writel(value, LVTS_MSRCTL0(lvts_ctrl->base));
 
        /*
-        * LVTS_MSRCTL1 : Measurement control
-        *
-        * Bits:
-        *
-        * 9: Ignore MSRCTL0 config and do immediate measurement on sensor3
-        * 6: Ignore MSRCTL0 config and do immediate measurement on sensor2
-        * 5: Ignore MSRCTL0 config and do immediate measurement on sensor1
-        * 4: Ignore MSRCTL0 config and do immediate measurement on sensor0
-        *
-        * That configuration will ignore the filtering and the delays
-        * introduced below in MONCTL1 and MONCTL2
-        */
-       if (lvts_ctrl->mode == LVTS_MSR_IMMEDIATE_MODE) {
-               value = BIT(9) | BIT(6) | BIT(5) | BIT(4);
-               writel(value, LVTS_MSRCTL1(lvts_ctrl->base));
-       }
-
-       /*
         * LVTS_MONCTL1 : Period unit and group interval configuration
         *
         * The clock source of LVTS thermal controller is 26MHz.
@@ -979,6 +1035,15 @@ static int lvts_ctrl_start(struct device *dev, struct lvts_ctrl *lvts_ctrl)
        struct thermal_zone_device *tz;
        u32 sensor_map = 0;
        int i;
+       /*
+        * Bitmaps to enable each sensor on immediate and filtered modes, as
+        * described in MSRCTL1 and MONCTL0 registers below, respectively.
+        */
+       u32 sensor_imm_bitmap[] = { BIT(4), BIT(5), BIT(6), BIT(9) };
+       u32 sensor_filt_bitmap[] = { BIT(0), BIT(1), BIT(2), BIT(3) };
+
+       u32 *sensor_bitmap = lvts_ctrl->mode == LVTS_MSR_IMMEDIATE_MODE ?
+                            sensor_imm_bitmap : sensor_filt_bitmap;
 
        for (i = 0; i < lvts_ctrl->num_lvts_sensor; i++) {
 
@@ -1016,20 +1081,38 @@ static int lvts_ctrl_start(struct device *dev, struct lvts_ctrl *lvts_ctrl)
                 * map, so we can enable the temperature monitoring in
                 * the hardware thermal controller.
                 */
-               sensor_map |= BIT(i);
+               sensor_map |= sensor_bitmap[i];
        }
 
        /*
-        * Bits:
-        *      9: Single point access flow
-        *    0-3: Enable sensing point 0-3
-        *
         * The initialization of the thermal zones give us
         * which sensor point to enable. If any thermal zone
         * was not described in the device tree, it won't be
         * enabled here in the sensor map.
         */
-       writel(sensor_map | BIT(9), LVTS_MONCTL0(lvts_ctrl->base));
+       if (lvts_ctrl->mode == LVTS_MSR_IMMEDIATE_MODE) {
+               /*
+                * LVTS_MSRCTL1 : Measurement control
+                *
+                * Bits:
+                *
+                * 9: Ignore MSRCTL0 config and do immediate measurement on sensor3
+                * 6: Ignore MSRCTL0 config and do immediate measurement on sensor2
+                * 5: Ignore MSRCTL0 config and do immediate measurement on sensor1
+                * 4: Ignore MSRCTL0 config and do immediate measurement on sensor0
+                *
+                * That configuration will ignore the filtering and the delays
+                * introduced in MONCTL1 and MONCTL2
+                */
+               writel(sensor_map, LVTS_MSRCTL1(lvts_ctrl->base));
+       } else {
+               /*
+                * Bits:
+                *      9: Single point access flow
+                *    0-3: Enable sensing point 0-3
+                */
+               writel(sensor_map | BIT(9), LVTS_MONCTL0(lvts_ctrl->base));
+       }
 
        return 0;
 }
@@ -1138,7 +1221,7 @@ static int lvts_probe(struct platform_device *pdev)
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
-               return dev_err_probe(dev, irq, "No irq resource\n");
+               return irq;
 
        ret = lvts_domain_init(dev, lvts_td, lvts_data);
        if (ret)
index a941b42..87c09f6 100644 (file)
@@ -23,7 +23,7 @@
 
 #define BIT_APPEND             0x3
 
-struct tsens_legacy_calibration_format tsens_8916_nvmem = {
+static struct tsens_legacy_calibration_format tsens_8916_nvmem = {
        .base_len = 7,
        .base_shift = 3,
        .sp_len = 5,
@@ -39,7 +39,7 @@ struct tsens_legacy_calibration_format tsens_8916_nvmem = {
        },
 };
 
-struct tsens_legacy_calibration_format tsens_8974_nvmem = {
+static struct tsens_legacy_calibration_format tsens_8974_nvmem = {
        .base_len = 8,
        .base_shift = 2,
        .sp_len = 6,
@@ -61,7 +61,7 @@ struct tsens_legacy_calibration_format tsens_8974_nvmem = {
        },
 };
 
-struct tsens_legacy_calibration_format tsens_8974_backup_nvmem = {
+static struct tsens_legacy_calibration_format tsens_8974_backup_nvmem = {
        .base_len = 8,
        .base_shift = 2,
        .sp_len = 6,
index 5132243..dc1c4ae 100644 (file)
@@ -21,7 +21,7 @@
 #define TM_HIGH_LOW_INT_STATUS_OFF             0x0088
 #define TM_HIGH_LOW_Sn_INT_THRESHOLD_OFF       0x0090
 
-struct tsens_legacy_calibration_format tsens_qcs404_nvmem = {
+static struct tsens_legacy_calibration_format tsens_qcs404_nvmem = {
        .base_len = 8,
        .base_shift = 2,
        .sp_len = 6,
index 58f4d8f..e5bc2c8 100644 (file)
@@ -887,7 +887,7 @@ static int exynos_map_dt_data(struct platform_device *pdev)
                return -EADDRNOTAVAIL;
        }
 
-       data->soc = (enum soc_type)of_device_get_match_data(&pdev->dev);
+       data->soc = (uintptr_t)of_device_get_match_data(&pdev->dev);
 
        switch (data->soc) {
        case SOC_ARCH_EXYNOS4210:
index 6e78616..96d9928 100644 (file)
@@ -122,8 +122,8 @@ static int spear_thermal_probe(struct platform_device *pdev)
        stdev->flags = val;
        writel_relaxed(stdev->flags, stdev->thermal_base);
 
-       spear_thermal = thermal_zone_device_register("spear_thermal", 0, 0,
-                               stdev, &ops, NULL, 0, 0);
+       spear_thermal = thermal_tripless_zone_device_register("spear_thermal",
+                                                             stdev, &ops, NULL);
        if (IS_ERR(spear_thermal)) {
                dev_err(&pdev->dev, "thermal zone device is NULL\n");
                ret = PTR_ERR(spear_thermal);
index cca16d6..f989b55 100644 (file)
@@ -56,8 +56,6 @@
 #define SUN50I_H6_THS_PC_TEMP_PERIOD(x)                ((GENMASK(19, 0) & (x)) << 12)
 #define SUN50I_H6_THS_DATA_IRQ_STS(x)          BIT(x)
 
-/* millidegree celsius */
-
 struct tsensor {
        struct ths_device               *tmdev;
        struct thermal_zone_device      *tzd;
@@ -286,7 +284,7 @@ static int sun8i_ths_calibrate(struct ths_device *tmdev)
        size_t callen;
        int ret = 0;
 
-       calcell = devm_nvmem_cell_get(dev, "calibration");
+       calcell = nvmem_cell_get(dev, "calibration");
        if (IS_ERR(calcell)) {
                if (PTR_ERR(calcell) == -EPROBE_DEFER)
                        return -EPROBE_DEFER;
@@ -316,6 +314,8 @@ static int sun8i_ths_calibrate(struct ths_device *tmdev)
 
        kfree(caldata);
 out:
+       if (!IS_ERR(calcell))
+               nvmem_cell_put(calcell);
        return ret;
 }
 
@@ -489,8 +489,6 @@ static int sun8i_ths_probe(struct platform_device *pdev)
        if (!tmdev->chip)
                return -EINVAL;
 
-       platform_set_drvdata(pdev, tmdev);
-
        ret = sun8i_ths_resource_init(tmdev);
        if (ret)
                return ret;
index a2879d6..4ffc3bb 100644 (file)
@@ -167,19 +167,69 @@ static int tegra_bpmp_thermal_get_num_zones(struct tegra_bpmp *bpmp,
        return 0;
 }
 
+static int tegra_bpmp_thermal_trips_supported(struct tegra_bpmp *bpmp, bool *supported)
+{
+       struct mrq_thermal_host_to_bpmp_request req;
+       union mrq_thermal_bpmp_to_host_response reply;
+       struct tegra_bpmp_message msg;
+       int err;
+
+       memset(&req, 0, sizeof(req));
+       req.type = CMD_THERMAL_QUERY_ABI;
+       req.query_abi.type = CMD_THERMAL_SET_TRIP;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.mrq = MRQ_THERMAL;
+       msg.tx.data = &req;
+       msg.tx.size = sizeof(req);
+       msg.rx.data = &reply;
+       msg.rx.size = sizeof(reply);
+
+       err = tegra_bpmp_transfer(bpmp, &msg);
+       if (err)
+               return err;
+
+       if (msg.rx.ret == 0) {
+               *supported = true;
+               return 0;
+       } else if (msg.rx.ret == -BPMP_ENODEV) {
+               *supported = false;
+               return 0;
+       } else {
+               return -EINVAL;
+       }
+}
+
 static const struct thermal_zone_device_ops tegra_bpmp_of_thermal_ops = {
        .get_temp = tegra_bpmp_thermal_get_temp,
        .set_trips = tegra_bpmp_thermal_set_trips,
 };
 
+static const struct thermal_zone_device_ops tegra_bpmp_of_thermal_ops_notrips = {
+       .get_temp = tegra_bpmp_thermal_get_temp,
+};
+
 static int tegra_bpmp_thermal_probe(struct platform_device *pdev)
 {
        struct tegra_bpmp *bpmp = dev_get_drvdata(pdev->dev.parent);
+       const struct thermal_zone_device_ops *thermal_ops;
        struct tegra_bpmp_thermal *tegra;
        struct thermal_zone_device *tzd;
        unsigned int i, max_num_zones;
+       bool supported;
        int err;
 
+       err = tegra_bpmp_thermal_trips_supported(bpmp, &supported);
+       if (err) {
+               dev_err(&pdev->dev, "failed to determine if trip points are supported\n");
+               return err;
+       }
+
+       if (supported)
+               thermal_ops = &tegra_bpmp_of_thermal_ops;
+       else
+               thermal_ops = &tegra_bpmp_of_thermal_ops_notrips;
+
        tegra = devm_kzalloc(&pdev->dev, sizeof(*tegra), GFP_KERNEL);
        if (!tegra)
                return -ENOMEM;
@@ -222,7 +272,7 @@ static int tegra_bpmp_thermal_probe(struct platform_device *pdev)
                }
 
                tzd = devm_thermal_of_zone_register(
-                       &pdev->dev, i, zone, &tegra_bpmp_of_thermal_ops);
+                       &pdev->dev, i, zone, thermal_ops);
                if (IS_ERR(tzd)) {
                        if (PTR_ERR(tzd) == -EPROBE_DEFER)
                                return -EPROBE_DEFER;
index f4f1a04..1717e4a 100644 (file)
@@ -142,7 +142,6 @@ static int gadc_thermal_probe(struct platform_device *pdev)
                return ret;
 
        gti->dev = &pdev->dev;
-       platform_set_drvdata(pdev, gti);
 
        gti->tz_dev = devm_thermal_of_zone_register(&pdev->dev, 0, gti,
                                                    &gadc_thermal_ops);
index a597005..8717a33 100644 (file)
@@ -1266,7 +1266,7 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
                return ERR_PTR(-EINVAL);
        }
 
-       if (num_trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp) && !trips)
+       if (num_trips > 0 && !trips)
                return ERR_PTR(-EINVAL);
 
        if (!thermal_class)
@@ -1389,16 +1389,16 @@ free_tz:
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_register_with_trips);
 
-struct thermal_zone_device *thermal_zone_device_register(const char *type, int ntrips, int mask,
-                                                        void *devdata, struct thermal_zone_device_ops *ops,
-                                                        const struct thermal_zone_params *tzp, int passive_delay,
-                                                        int polling_delay)
+struct thermal_zone_device *thermal_tripless_zone_device_register(
+                                       const char *type,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp)
 {
-       return thermal_zone_device_register_with_trips(type, NULL, ntrips, mask,
-                                                      devdata, ops, tzp,
-                                                      passive_delay, polling_delay);
+       return thermal_zone_device_register_with_trips(type, NULL, 0, 0, devdata,
+                                                      ops, tzp, 0, 0);
 }
-EXPORT_SYMBOL_GPL(thermal_zone_device_register);
+EXPORT_SYMBOL_GPL(thermal_tripless_zone_device_register);
 
 void *thermal_zone_device_priv(struct thermal_zone_device *tzd)
 {
index 04513f9..de884be 100644 (file)
@@ -70,7 +70,7 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
 void thermal_cdev_update(struct thermal_cooling_device *);
 void __thermal_cdev_update(struct thermal_cooling_device *cdev);
 
-int get_tz_trend(struct thermal_zone_device *tz, int trip);
+int get_tz_trend(struct thermal_zone_device *tz, int trip_index);
 
 struct thermal_instance *
 get_thermal_instance(struct thermal_zone_device *tz,
index cfba096..4d66372 100644 (file)
@@ -22,8 +22,9 @@
 #include "thermal_core.h"
 #include "thermal_trace.h"
 
-int get_tz_trend(struct thermal_zone_device *tz, int trip)
+int get_tz_trend(struct thermal_zone_device *tz, int trip_index)
 {
+       struct thermal_trip *trip = tz->trips ? &tz->trips[trip_index] : NULL;
        enum thermal_trend trend;
 
        if (tz->emul_temperature || !tz->ops->get_trend ||
index 53115cf..024e2e3 100644 (file)
@@ -101,29 +101,11 @@ void __thermal_zone_set_trips(struct thermal_zone_device *tz)
 int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
                            struct thermal_trip *trip)
 {
-       int ret;
-
-       if (!tz || trip_id < 0 || trip_id >= tz->num_trips || !trip)
+       if (!tz || !tz->trips || trip_id < 0 || trip_id >= tz->num_trips || !trip)
                return -EINVAL;
 
-       if (tz->trips) {
-               *trip = tz->trips[trip_id];
-               return 0;
-       }
-
-       if (tz->ops->get_trip_hyst) {
-               ret = tz->ops->get_trip_hyst(tz, trip_id, &trip->hysteresis);
-               if (ret)
-                       return ret;
-       } else {
-               trip->hysteresis = 0;
-       }
-
-       ret = tz->ops->get_trip_temp(tz, trip_id, &trip->temperature);
-       if (ret)
-               return ret;
-
-       return tz->ops->get_trip_type(tz, trip_id, &trip->type);
+       *trip = tz->trips[trip_id];
+       return 0;
 }
 EXPORT_SYMBOL_GPL(__thermal_zone_get_trip);
 
index a1c9a15..0c2eb9c 100644 (file)
@@ -314,7 +314,7 @@ int ti_bandgap_adc_to_mcelsius(struct ti_bandgap *bgp, int adc_val, int *t)
  */
 static inline int ti_bandgap_validate(struct ti_bandgap *bgp, int id)
 {
-       if (!bgp || IS_ERR(bgp)) {
+       if (IS_ERR_OR_NULL(bgp)) {
                pr_err("%s: invalid bandgap pointer\n", __func__);
                return -EINVAL;
        }
index d414a4b..6ba2613 100644 (file)
@@ -109,7 +109,8 @@ static inline int __ti_thermal_get_temp(struct thermal_zone_device *tz, int *tem
        return ret;
 }
 
-static int __ti_thermal_get_trend(struct thermal_zone_device *tz, int trip, enum thermal_trend *trend)
+static int __ti_thermal_get_trend(struct thermal_zone_device *tz,
+                                 struct thermal_trip *trip, enum thermal_trend *trend)
 {
        struct ti_thermal_data *data = thermal_zone_device_priv(tz);
        struct ti_bandgap *bgp;
index 34e4239..374e5aa 100644 (file)
@@ -76,8 +76,7 @@ static int ufs_bsg_exec_advanced_rpmb_req(struct ufs_hba *hba, struct bsg_job *j
        int ret;
        int data_len;
 
-       if (hba->ufs_version < ufshci_version(4, 0) || !hba->dev_info.b_advanced_rpmb_en ||
-           !(hba->capabilities & MASK_EHSLUTRD_SUPPORTED))
+       if (hba->ufs_version < ufshci_version(4, 0) || !hba->dev_info.b_advanced_rpmb_en)
                return -EINVAL;
 
        if (rpmb_request->ehs_req.length != 2 || rpmb_request->ehs_req.ehs_type != 1)
index e431817..9341751 100644 (file)
@@ -7240,11 +7240,17 @@ int ufshcd_advanced_rpmb_req_handler(struct ufs_hba *hba, struct utp_upiu_req *r
        /* Advanced RPMB starts from UFS 4.0, so its command type is UTP_CMD_TYPE_UFS_STORAGE */
        lrbp->command_type = UTP_CMD_TYPE_UFS_STORAGE;
 
-       ufshcd_prepare_req_desc_hdr(lrbp, &upiu_flags, dir, 2);
+       /*
+        * According to UFSHCI 4.0 specification page 24, if EHSLUTRDS is 0, host controller takes
+        * EHS length from CMD UPIU, and SW driver use EHS Length field in CMD UPIU. if it is 1,
+        * HW controller takes EHS length from UTRD.
+        */
+       if (hba->capabilities & MASK_EHSLUTRD_SUPPORTED)
+               ufshcd_prepare_req_desc_hdr(lrbp, &upiu_flags, dir, 2);
+       else
+               ufshcd_prepare_req_desc_hdr(lrbp, &upiu_flags, dir, 0);
 
-       /* update the task tag and LUN in the request upiu */
-       req_upiu->header.flags = upiu_flags;
-       req_upiu->header.lun = UFS_UPIU_RPMB_WLUN;
+       /* update the task tag */
        req_upiu->header.task_tag = tag;
 
        /* copy the UPIU(contains CDB) request as it is */
index ff01f2c..6010135 100644 (file)
@@ -13,7 +13,9 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 /*
  * USB Control Register
index d3bea42..d28c30b 100644 (file)
@@ -87,8 +87,7 @@ static int gpio_backlight_probe(struct platform_device *pdev)
                /* Not booted with device tree or no phandle link to the node */
                bl->props.power = def_value ? FB_BLANK_UNBLANK
                                            : FB_BLANK_POWERDOWN;
-       else if (gpiod_get_direction(gbl->gpiod) == 0 &&
-                gpiod_get_value_cansleep(gbl->gpiod) == 0)
+       else if (gpiod_get_value_cansleep(gbl->gpiod) == 0)
                bl->props.power = FB_BLANK_POWERDOWN;
        else
                bl->props.power = FB_BLANK_UNBLANK;
index 3259292..032f8bd 100644 (file)
@@ -243,7 +243,7 @@ MODULE_DEVICE_TABLE(of, led_bl_of_match);
 static struct platform_driver led_bl_driver = {
        .driver         = {
                .name           = "led-backlight",
-               .of_match_table = of_match_ptr(led_bl_of_match),
+               .of_match_table = led_bl_of_match,
        },
        .probe          = led_bl_probe,
        .remove_new     = led_bl_remove,
index 1c9e921..da1f124 100644 (file)
@@ -71,6 +71,7 @@ struct lp855x {
        struct device *dev;
        struct lp855x_platform_data *pdata;
        struct pwm_device *pwm;
+       bool needs_pwm_init;
        struct regulator *supply;       /* regulator for VDD input */
        struct regulator *enable;       /* regulator for EN/VDDIO input */
 };
@@ -216,16 +217,24 @@ err:
        return ret;
 }
 
-static void lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
+static int lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
 {
        struct pwm_state state;
 
-       pwm_get_state(lp->pwm, &state);
+       if (lp->needs_pwm_init) {
+               pwm_init_state(lp->pwm, &state);
+               /* Legacy platform data compatibility */
+               if (lp->pdata->period_ns > 0)
+                       state.period = lp->pdata->period_ns;
+               lp->needs_pwm_init = false;
+       } else {
+               pwm_get_state(lp->pwm, &state);
+       }
 
        state.duty_cycle = div_u64(br * state.period, max_br);
        state.enabled = state.duty_cycle;
 
-       pwm_apply_state(lp->pwm, &state);
+       return pwm_apply_state(lp->pwm, &state);
 }
 
 static int lp855x_bl_update_status(struct backlight_device *bl)
@@ -237,11 +246,12 @@ static int lp855x_bl_update_status(struct backlight_device *bl)
                brightness = 0;
 
        if (lp->mode == PWM_BASED)
-               lp855x_pwm_ctrl(lp, brightness, bl->props.max_brightness);
+               return lp855x_pwm_ctrl(lp, brightness,
+                                     bl->props.max_brightness);
        else if (lp->mode == REGISTER_BASED)
-               lp855x_write_byte(lp, lp->cfg->reg_brightness, (u8)brightness);
-
-       return 0;
+               return lp855x_write_byte(lp, lp->cfg->reg_brightness,
+                                       (u8)brightness);
+       return -EINVAL;
 }
 
 static const struct backlight_ops lp855x_bl_ops = {
@@ -387,7 +397,6 @@ static int lp855x_probe(struct i2c_client *cl)
        const struct i2c_device_id *id = i2c_client_get_device_id(cl);
        const struct acpi_device_id *acpi_id = NULL;
        struct device *dev = &cl->dev;
-       struct pwm_state pwmstate;
        struct lp855x *lp;
        int ret;
 
@@ -470,15 +479,11 @@ static int lp855x_probe(struct i2c_client *cl)
                else
                        return dev_err_probe(dev, ret, "getting PWM\n");
 
+               lp->needs_pwm_init = false;
                lp->mode = REGISTER_BASED;
                dev_dbg(dev, "mode: register based\n");
        } else {
-               pwm_init_state(lp->pwm, &pwmstate);
-               /* Legacy platform data compatibility */
-               if (lp->pdata->period_ns > 0)
-                       pwmstate.period = lp->pdata->period_ns;
-               pwm_apply_state(lp->pwm, &pwmstate);
-
+               lp->needs_pwm_init = true;
                lp->mode = PWM_BASED;
                dev_dbg(dev, "mode: PWM based\n");
        }
index c6996aa..1012909 100644 (file)
@@ -9,8 +9,8 @@
 #include <linux/backlight.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_address.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 
 /* From DT binding */
index 0cbfb49..7514589 100644 (file)
@@ -307,7 +307,7 @@ config XILINX_WATCHDOG
 config XILINX_WINDOW_WATCHDOG
        tristate "Xilinx window watchdog timer"
        depends on HAS_IOMEM
-       depends on ARM64
+       depends on ARM64 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Window watchdog driver for the versal_wwdt IP core.
@@ -343,7 +343,7 @@ config RAVE_SP_WATCHDOG
 
 config MLX_WDT
        tristate "Mellanox Watchdog"
-       depends on MELLANOX_PLATFORM
+       depends on MELLANOX_PLATFORM || COMPILE_TEST
        select WATCHDOG_CORE
        select REGMAP
        help
@@ -493,7 +493,7 @@ config FTWDT010_WATCHDOG
 
 config IXP4XX_WATCHDOG
        tristate "IXP4xx Watchdog"
-       depends on ARCH_IXP4XX
+       depends on ARCH_IXP4XX || (ARM && COMPILE_TEST)
        select WATCHDOG_CORE
        help
          Say Y here if to include support for the watchdog timer
@@ -529,7 +529,7 @@ config S3C2410_WATCHDOG
 
 config SA1100_WATCHDOG
        tristate "SA1100/PXA2xx watchdog"
-       depends on ARCH_SA1100 || ARCH_PXA
+       depends on ARCH_SA1100 || ARCH_PXA || COMPILE_TEST
        help
          Watchdog timer embedded into SA11x0 and PXA2xx chips. This will
          reboot your system when timeout is reached.
@@ -720,7 +720,7 @@ config IMX2_WDT
 config IMX_SC_WDT
        tristate "IMX SC Watchdog"
        depends on HAVE_ARM_SMCCC
-       depends on IMX_SCU
+       depends on IMX_SCU || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the system controller watchdog
@@ -931,7 +931,7 @@ config ASPEED_WATCHDOG
 
 config STM32_WATCHDOG
        tristate "STM32 Independent WatchDoG (IWDG) support"
-       depends on ARCH_STM32
+       depends on ARCH_STM32 || COMPILE_TEST
        select WATCHDOG_CORE
        default y
        help
@@ -1065,7 +1065,7 @@ config ACQUIRE_WDT
 
 config ADVANTECH_WDT
        tristate "Advantech SBC Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          If you are configuring a Linux kernel for the Advantech single-board
          computer, say `Y' here to support its built-in watchdog timer
@@ -1074,14 +1074,16 @@ config ADVANTECH_WDT
 
 config ADVANTECH_EC_WDT
        tristate "Advantech Embedded Controller Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
+       select ISA_BUS_API
+       select WATCHDOG_CORE
        help
                This driver supports Advantech products with ITE based Embedded Controller.
                It does not support Advantech products with other ECs or without EC.
 
 config ALIM1535_WDT
        tristate "ALi M1535 PMU Watchdog Timer"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        help
          This is the driver for the hardware watchdog on the ALi M1535 PMU.
 
@@ -1105,7 +1107,7 @@ config ALIM7101_WDT
 
 config EBC_C384_WDT
        tristate "WinSystems EBC-C384 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select ISA_BUS_API
        select WATCHDOG_CORE
        help
@@ -1115,7 +1117,7 @@ config EBC_C384_WDT
 
 config EXAR_WDT
        tristate "Exar Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Enables watchdog timer support for the watchdog timer present
@@ -1126,7 +1128,7 @@ config EXAR_WDT
 
 config F71808E_WDT
        tristate "Fintek F718xx, F818xx Super I/O Watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog on the Fintek F71808E,
@@ -1138,7 +1140,7 @@ config F71808E_WDT
 
 config SP5100_TCO
        tristate "AMD/ATI SP5100 TCO Timer/Watchdog"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        select WATCHDOG_CORE
        help
          Hardware watchdog driver for the AMD/ATI SP5100 chipset. The TCO
@@ -1177,7 +1179,7 @@ config SC520_WDT
 
 config SBC_FITPC2_WATCHDOG
        tristate "Compulab SBC-FITPC2 watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the built-in watchdog timer on the fit-PC2,
          fit-PC2i, CM-iAM single-board computers made by Compulab.
@@ -1202,7 +1204,7 @@ config SBC_FITPC2_WATCHDOG
 
 config EUROTECH_WDT
        tristate "Eurotech CPU-1220/1410 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          Enable support for the watchdog timer on the Eurotech CPU-1220 and
          CPU-1410 cards.  These are PC/104 SBCs. Spec sheets and product
@@ -1210,7 +1212,7 @@ config EUROTECH_WDT
 
 config IB700_WDT
        tristate "IB700 SBC Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog on the IB700 Single
          Board Computer produced by TMC Technology (www.tmc-uk.com). This
@@ -1227,7 +1229,7 @@ config IB700_WDT
 
 config IBMASR
        tristate "IBM Automatic Server Restart"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the IBM Automatic Server Restart watchdog
          timer built-in into some eServer xSeries machines.
@@ -1237,7 +1239,7 @@ config IBMASR
 
 config WAFER_WDT
        tristate "ICP Single Board Computer Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is a driver for the hardware watchdog on the ICP Single
          Board Computer. This driver is working on (at least) the following
@@ -1259,7 +1261,7 @@ config I6300ESB_WDT
 
 config IE6XX_WDT
        tristate "Intel Atom E6xx Watchdog"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        select WATCHDOG_CORE
        select MFD_CORE
        select LPC_SCH
@@ -1319,7 +1321,7 @@ config ITCO_VENDOR_SUPPORT
 
 config IT8712F_WDT
        tristate "IT8712F (Smart Guardian) Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the built-in watchdog timer on the IT8712F
          Super I/0 chipset used on many motherboards.
@@ -1332,7 +1334,7 @@ config IT8712F_WDT
 
 config IT87_WDT
        tristate "IT87 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog on the ITE IT8607,
@@ -1350,7 +1352,7 @@ config IT87_WDT
 config HP_WATCHDOG
        tristate "HP ProLiant iLO2+ Hardware Watchdog Timer"
        select WATCHDOG_CORE
-       depends on (ARM64 || X86) && PCI
+       depends on (ARM64 || X86 || COMPILE_TEST) && PCI
        help
          A software monitoring watchdog and NMI handling driver. This driver
          will detect lockups and provide a stack trace. This is a driver that
@@ -1380,7 +1382,7 @@ config KEMPLD_WDT
 
 config SC1200_WDT
        tristate "National Semiconductor PC87307/PC97307 (ala SC1200) Watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is a driver for National Semiconductor PC87307/PC97307 hardware
          watchdog cards as found on the SC1200. This watchdog is mainly used
@@ -1403,7 +1405,7 @@ config SCx200_WDT
 
 config PC87413_WDT
        tristate "NS PC87413 watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog on the PC87413 chipset
          This watchdog simply watches your kernel to make sure it doesn't
@@ -1417,7 +1419,7 @@ config PC87413_WDT
 
 config NV_TCO
        tristate "nVidia TCO Timer/Watchdog"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        help
          Hardware driver for the TCO timer built into the nVidia Hub family
          (such as the MCP51).  The TCO (Total Cost of Ownership) timer is a
@@ -1446,7 +1448,7 @@ config RDC321X_WDT
 
 config 60XX_WDT
        tristate "SBC-60XX Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This driver can be used with the watchdog timer found on some
          single board computers, namely the 6010 PII based computer.
@@ -1486,7 +1488,7 @@ config SBC7240_WDT
 
 config CPU5_WDT
        tristate "SMA CPU5 Watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          TBD.
          To compile this driver as a module, choose M here: the
@@ -1494,7 +1496,7 @@ config CPU5_WDT
 
 config SMSC_SCH311X_WDT
        tristate "SMSC SCH311X Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog timer on the
          SMSC SCH3112, SCH3114 and SCH3116 Super IO chipset
@@ -1506,7 +1508,7 @@ config SMSC_SCH311X_WDT
 
 config SMSC37B787_WDT
        tristate "Winbond SMsC37B787 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog component on the
          Winbond SMsC37B787 chipset as used on the NetRunner Mainboard
@@ -1526,7 +1528,7 @@ config SMSC37B787_WDT
 
 config TQMX86_WDT
        tristate "TQ-Systems TQMX86 Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog timer in the TQMX86 IO
@@ -1539,7 +1541,7 @@ config TQMX86_WDT
 
 config VIA_WDT
        tristate "VIA Watchdog Timer"
-       depends on X86 && PCI
+       depends on (X86 || COMPILE_TEST) && PCI
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog timer on VIA
@@ -1552,7 +1554,7 @@ config VIA_WDT
 
 config W83627HF_WDT
        tristate "Watchdog timer for W83627HF/W83627DHG and compatibles"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          This is the driver for the hardware watchdog on the following
@@ -1582,7 +1584,7 @@ config W83627HF_WDT
 
 config W83877F_WDT
        tristate "W83877F (EMACS) Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog on the W83877F chipset
          as used in EMACS PC-104 motherboards (and likely others).  This
@@ -1597,7 +1599,7 @@ config W83877F_WDT
 
 config W83977F_WDT
        tristate "W83977F (PCM-5335) Watchdog Timer"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the hardware watchdog on the W83977F I/O chip
          as used in AAEON's PCM-5335 SBC (and likely others).  This
@@ -1610,7 +1612,7 @@ config W83977F_WDT
 
 config MACHZ_WDT
        tristate "ZF MachZ Watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          If you are using a ZF Micro MachZ processor, say Y here, otherwise
          N.  This is the driver for the watchdog timer built-in on that
@@ -1623,7 +1625,7 @@ config MACHZ_WDT
 
 config SBC_EPX_C3_WATCHDOG
        tristate "Winsystems SBC EPX-C3 watchdog"
-       depends on X86
+       depends on X86 || COMPILE_TEST
        help
          This is the driver for the built-in watchdog timer on the EPX-C3
          Single-board computer made by Winsystems, Inc.
@@ -1739,7 +1741,7 @@ config INDYDOG
 
 config JZ4740_WDT
        tristate "Ingenic jz4740 SoC hardware watchdog"
-       depends on MIPS
+       depends on MIPS || COMPILE_TEST
        depends on COMMON_CLK
        select WATCHDOG_CORE
        select MFD_SYSCON
@@ -1798,6 +1800,19 @@ config OCTEON_WDT
          from the first interrupt, it is then only poked when the
          device is written.
 
+config MARVELL_GTI_WDT
+       tristate "Marvell GTI Watchdog driver"
+       depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
+       default y
+       select WATCHDOG_CORE
+       help
+         Marvell GTI hardware supports watchdog timer. First timeout
+         works as watchdog pretimeout and installed interrupt handler
+         will be called on first timeout. Hardware can generate interrupt
+         to SCP on second timeout but it is not enabled, so second
+         timeout is ignored. If device poke does not happen then system
+         will reboot on third timeout.
+
 config BCM2835_WDT
        tristate "Broadcom BCM2835 hardware watchdog"
        depends on ARCH_BCM2835 || (OF && COMPILE_TEST)
@@ -1823,7 +1838,7 @@ config BCM_KONA_WDT
 
 config BCM_KONA_WDT_DEBUG
        bool "DEBUGFS support for BCM Kona Watchdog"
-       depends on BCM_KONA_WDT
+       depends on BCM_KONA_WDT || COMPILE_TEST
        help
          If enabled, adds /sys/kernel/debug/bcm_kona_wdt/info which provides
          access to the driver's internal data structures as well as watchdog
@@ -1864,7 +1879,7 @@ config LANTIQ_WDT
 
 config LOONGSON1_WDT
        tristate "Loongson1 SoC hardware watchdog"
-       depends on MACH_LOONGSON32
+       depends on MACH_LOONGSON32 || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Hardware driver for the Loongson1 SoC Watchdog Timer.
@@ -1878,7 +1893,7 @@ config RALINK_WDT
 
 config GXP_WATCHDOG
        tristate "HPE GXP watchdog support"
-       depends on ARCH_HPE_GXP
+       depends on ARCH_HPE_GXP || COMPILE_TEST
        select WATCHDOG_CORE
        help
          Say Y here to include support for the watchdog timer
index 3633f5b..7eab9de 100644 (file)
@@ -98,6 +98,7 @@ obj-$(CONFIG_VISCONTI_WATCHDOG) += visconti_wdt.o
 obj-$(CONFIG_MSC313E_WATCHDOG) += msc313e_wdt.o
 obj-$(CONFIG_APPLE_WATCHDOG) += apple_wdt.o
 obj-$(CONFIG_SUNPLUS_WATCHDOG) += sunplus_wdt.o
+obj-$(CONFIG_MARVELL_GTI_WDT) += marvell_gti_wdt.o
 
 # X86 (i386 + ia64 + x86_64) Architecture
 obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o
index e586529..8133a5d 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/types.h>
index d20ec27..558015f 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/atmel-st.h>
 #include <linux/miscdevice.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/platform_device.h>
@@ -26,8 +27,6 @@
 #include <linux/types.h>
 #include <linux/watchdog.h>
 #include <linux/uaccess.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 
 #define WDT_DEFAULT_TIME       5       /* seconds */
 #define WDT_MAX_TIME           256     /* seconds */
index 47250f9..901b94d 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/uaccess.h>
 
 #include <asm/irq.h>
index 442c5bf..28f5af7 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/watchdog.h>
@@ -221,20 +221,18 @@ static const struct dev_pm_ops ftwdt010_wdt_dev_pm_ops = {
                                ftwdt010_wdt_resume)
 };
 
-#ifdef CONFIG_OF
 static const struct of_device_id ftwdt010_wdt_match[] = {
        { .compatible = "faraday,ftwdt010" },
        { .compatible = "cortina,gemini-watchdog" },
        {},
 };
 MODULE_DEVICE_TABLE(of, ftwdt010_wdt_match);
-#endif
 
 static struct platform_driver ftwdt010_wdt_driver = {
        .probe          = ftwdt010_wdt_probe,
        .driver         = {
                .name   = "ftwdt010-wdt",
-               .of_match_table = of_match_ptr(ftwdt010_wdt_match),
+               .of_match_table = ftwdt010_wdt_match,
                .pm = &ftwdt010_wdt_dev_pm_ops,
        },
 };
index 97afc90..6a1db1c 100644 (file)
@@ -31,7 +31,7 @@
 #include <linux/fs.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
 
index 6fcc359..42e8ffa 100644 (file)
@@ -26,8 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/watchdog.h>
@@ -375,7 +374,7 @@ static void imx2_wdt_shutdown(struct platform_device *pdev)
                 */
                imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
                imx2_wdt_ping(wdog);
-               dev_crit(&pdev->dev, "Device shutdown: Expect reboot!\n");
+               dev_crit(&pdev->dev, "Device shutdown.\n");
        }
 }
 
index 7ca4867..c703586 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/watchdog.h>
index 9b2173f..fb7fae7 100644 (file)
@@ -203,3 +203,4 @@ module_platform_driver(mid_wdt_driver);
 MODULE_AUTHOR("David Cohen <david.a.cohen@linux.intel.com>");
 MODULE_DESCRIPTION("Watchdog Driver for Intel MID platform");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:intel_mid_wdt");
index 6fab504..a273b97 100644 (file)
@@ -9,7 +9,8 @@
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/watchdog.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/uaccess.h>
 #include <linux/clk.h>
 #include <linux/io.h>
index 4ac7810..0587ff4 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
diff --git a/drivers/watchdog/marvell_gti_wdt.c b/drivers/watchdog/marvell_gti_wdt.c
new file mode 100644 (file)
index 0000000..d7eb828
--- /dev/null
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell GTI Watchdog driver
+ *
+ * Copyright (C) 2023 Marvell.
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+/*
+ * Hardware supports following mode of operation:
+ * 1) Interrupt Only:
+ *    This will generate the interrupt to arm core whenever timeout happens.
+ *
+ * 2) Interrupt + del3t (Interrupt to firmware (SCP processor)).
+ *    This will generate interrupt to arm core on 1st timeout happens
+ *    This will generate interrupt to SCP processor on 2nd timeout happens
+ *
+ * 3) Interrupt + Interrupt to SCP processor (called delt3t) + reboot.
+ *    This will generate interrupt to arm core on 1st timeout happens
+ *    Will generate interrupt to SCP processor on 2nd timeout happens,
+ *    if interrupt is configured.
+ *    Reboot on 3rd timeout.
+ *
+ * Driver will use hardware in mode-3 above so that system can reboot in case
+ * a hardware hang. Also h/w is configured not to generate SCP interrupt, so
+ * effectively 2nd timeout is ignored within hardware.
+ *
+ * First timeout is effectively watchdog pretimeout.
+ */
+
+/* GTI CWD Watchdog (GTI_CWD_WDOG) Register */
+#define GTI_CWD_WDOG(reg_offset)       (0x8 * (reg_offset))
+#define GTI_CWD_WDOG_MODE_INT_DEL3T_RST        0x3
+#define GTI_CWD_WDOG_MODE_MASK         GENMASK_ULL(1, 0)
+#define GTI_CWD_WDOG_LEN_SHIFT         4
+#define GTI_CWD_WDOG_LEN_MASK          GENMASK_ULL(19, 4)
+#define GTI_CWD_WDOG_CNT_SHIFT         20
+#define GTI_CWD_WDOG_CNT_MASK          GENMASK_ULL(43, 20)
+
+/* GTI CWD Watchdog Interrupt (GTI_CWD_INT) Register */
+#define GTI_CWD_INT                    0x200
+#define GTI_CWD_INT_PENDING_STATUS(bit)        BIT_ULL(bit)
+
+/* GTI CWD Watchdog Interrupt Enable Clear (GTI_CWD_INT_ENA_CLR) Register */
+#define GTI_CWD_INT_ENA_CLR            0x210
+#define GTI_CWD_INT_ENA_CLR_VAL(bit)   BIT_ULL(bit)
+
+/* GTI CWD Watchdog Interrupt Enable Set (GTI_CWD_INT_ENA_SET) Register */
+#define GTI_CWD_INT_ENA_SET            0x218
+#define GTI_CWD_INT_ENA_SET_VAL(bit)   BIT_ULL(bit)
+
+/* GTI CWD Watchdog Poke (GTI_CWD_POKE) Registers */
+#define GTI_CWD_POKE(reg_offset)       (0x10000 + 0x8 * (reg_offset))
+#define GTI_CWD_POKE_VAL               1
+
+struct gti_match_data {
+       u32 gti_num_timers;
+};
+
+static const struct gti_match_data match_data_octeontx2 = {
+       .gti_num_timers = 54,
+};
+
+static const struct gti_match_data match_data_cn10k = {
+       .gti_num_timers = 64,
+};
+
+struct gti_wdt_priv {
+       struct watchdog_device wdev;
+       void __iomem *base;
+       u32 clock_freq;
+       struct clk *sclk;
+       /* wdt_timer_idx used for timer to be used for system watchdog */
+       u32 wdt_timer_idx;
+       const struct gti_match_data *data;
+};
+
+static irqreturn_t gti_wdt_interrupt(int irq, void *data)
+{
+       struct watchdog_device *wdev = data;
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+       /* Clear Interrupt Pending Status */
+       writeq(GTI_CWD_INT_PENDING_STATUS(priv->wdt_timer_idx),
+              priv->base + GTI_CWD_INT);
+
+       watchdog_notify_pretimeout(wdev);
+
+       return IRQ_HANDLED;
+}
+
+static int gti_wdt_ping(struct watchdog_device *wdev)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+       writeq(GTI_CWD_POKE_VAL,
+              priv->base + GTI_CWD_POKE(priv->wdt_timer_idx));
+
+       return 0;
+}
+
+static int gti_wdt_start(struct watchdog_device *wdev)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+       u64 regval;
+
+       if (!wdev->pretimeout)
+               return -EINVAL;
+
+       set_bit(WDOG_HW_RUNNING, &wdev->status);
+
+       /* Clear any pending interrupt */
+       writeq(GTI_CWD_INT_PENDING_STATUS(priv->wdt_timer_idx),
+              priv->base + GTI_CWD_INT);
+
+       /* Enable Interrupt */
+       writeq(GTI_CWD_INT_ENA_SET_VAL(priv->wdt_timer_idx),
+              priv->base + GTI_CWD_INT_ENA_SET);
+
+       /* Set (Interrupt + SCP interrupt (DEL3T) + core domain reset) Mode */
+       regval = readq(priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+       regval |= GTI_CWD_WDOG_MODE_INT_DEL3T_RST;
+       writeq(regval, priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+
+       return 0;
+}
+
+static int gti_wdt_stop(struct watchdog_device *wdev)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+       u64 regval;
+
+       /* Disable Interrupt */
+       writeq(GTI_CWD_INT_ENA_CLR_VAL(priv->wdt_timer_idx),
+              priv->base + GTI_CWD_INT_ENA_CLR);
+
+       /* Set GTI_CWD_WDOG.Mode = 0 to stop the timer */
+       regval = readq(priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+       regval &= ~GTI_CWD_WDOG_MODE_MASK;
+       writeq(regval, priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+
+       return 0;
+}
+
+static int gti_wdt_settimeout(struct watchdog_device *wdev,
+                                       unsigned int timeout)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+       u64 timeout_wdog, regval;
+
+       /* Update new timeout */
+       wdev->timeout = timeout;
+
+       /* Pretimeout is 1/3 of timeout */
+       wdev->pretimeout = timeout / 3;
+
+       /* Get clock cycles from pretimeout */
+       timeout_wdog = (u64)priv->clock_freq * wdev->pretimeout;
+
+       /* Watchdog counts in 1024 cycle steps */
+       timeout_wdog = timeout_wdog >> 10;
+
+       /* GTI_CWD_WDOG.CNT: reload counter is 16-bit */
+       timeout_wdog = (timeout_wdog + 0xff) >> 8;
+       if (timeout_wdog >= 0x10000)
+               timeout_wdog = 0xffff;
+
+       /*
+        * GTI_CWD_WDOG.LEN is 24bit, lower 8-bits should be zero and
+        * upper 16-bits are same as GTI_CWD_WDOG.CNT
+        */
+       regval = readq(priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+       regval &= GTI_CWD_WDOG_MODE_MASK;
+       regval |= (timeout_wdog << (GTI_CWD_WDOG_CNT_SHIFT + 8)) |
+                  (timeout_wdog << GTI_CWD_WDOG_LEN_SHIFT);
+       writeq(regval, priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+
+       return 0;
+}
+
+static int gti_wdt_set_pretimeout(struct watchdog_device *wdev,
+                                       unsigned int timeout)
+{
+       struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+       struct watchdog_device *wdog_dev = &priv->wdev;
+
+       /* pretimeout should 1/3 of max_timeout */
+       if (timeout * 3 <= wdog_dev->max_timeout)
+               return gti_wdt_settimeout(wdev, timeout * 3);
+
+       return -EINVAL;
+}
+
+static void gti_clk_disable_unprepare(void *data)
+{
+       clk_disable_unprepare(data);
+}
+
+static int gti_wdt_get_cntfrq(struct platform_device *pdev,
+                             struct gti_wdt_priv *priv)
+{
+       int err;
+
+       priv->sclk = devm_clk_get_enabled(&pdev->dev, NULL);
+       if (IS_ERR(priv->sclk))
+               return PTR_ERR(priv->sclk);
+
+       err = devm_add_action_or_reset(&pdev->dev,
+                                      gti_clk_disable_unprepare, priv->sclk);
+       if (err)
+               return err;
+
+       priv->clock_freq = clk_get_rate(priv->sclk);
+       if (!priv->clock_freq)
+               return -EINVAL;
+
+       return 0;
+}
+
+static const struct watchdog_info gti_wdt_ident = {
+       .identity = "Marvell GTI watchdog",
+       .options = WDIOF_SETTIMEOUT | WDIOF_PRETIMEOUT | WDIOF_KEEPALIVEPING |
+                  WDIOF_MAGICCLOSE | WDIOF_CARDRESET,
+};
+
+static const struct watchdog_ops gti_wdt_ops = {
+       .owner = THIS_MODULE,
+       .start = gti_wdt_start,
+       .stop = gti_wdt_stop,
+       .ping = gti_wdt_ping,
+       .set_timeout = gti_wdt_settimeout,
+       .set_pretimeout = gti_wdt_set_pretimeout,
+};
+
+static int gti_wdt_probe(struct platform_device *pdev)
+{
+       struct gti_wdt_priv *priv;
+       struct device *dev = &pdev->dev;
+       struct watchdog_device *wdog_dev;
+       u64 max_pretimeout;
+       u32 wdt_idx;
+       int irq;
+       int err;
+
+       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(priv->base))
+               return dev_err_probe(&pdev->dev, PTR_ERR(priv->base),
+                             "reg property not valid/found\n");
+
+       err = gti_wdt_get_cntfrq(pdev, priv);
+       if (err)
+               return dev_err_probe(&pdev->dev, err,
+                                    "GTI clock frequency not valid/found");
+
+       priv->data = of_device_get_match_data(dev);
+
+       /* default use last timer for watchdog */
+       priv->wdt_timer_idx = priv->data->gti_num_timers - 1;
+
+       err = of_property_read_u32(dev->of_node, "marvell,wdt-timer-index",
+                                  &wdt_idx);
+       if (!err) {
+               if (wdt_idx >= priv->data->gti_num_timers)
+                       return dev_err_probe(&pdev->dev, err,
+                               "GTI wdog timer index not valid");
+
+               priv->wdt_timer_idx = wdt_idx;
+       }
+
+       wdog_dev = &priv->wdev;
+       wdog_dev->info = &gti_wdt_ident,
+       wdog_dev->ops = &gti_wdt_ops,
+       wdog_dev->parent = dev;
+       /*
+        * Watchdog counter is 24 bit where lower 8 bits are zeros
+        * This counter decrements every 1024 clock cycles.
+        */
+       max_pretimeout = (GTI_CWD_WDOG_CNT_MASK >> GTI_CWD_WDOG_CNT_SHIFT);
+       max_pretimeout &= ~0xFFUL;
+       max_pretimeout = (max_pretimeout * 1024) / priv->clock_freq;
+       wdog_dev->pretimeout = max_pretimeout;
+
+       /* Maximum timeout is 3 times the pretimeout */
+       wdog_dev->max_timeout = max_pretimeout * 3;
+       /* Minimum first timeout (pretimeout) is 1, so min_timeout as 3 */
+       wdog_dev->min_timeout = 3;
+       wdog_dev->timeout = wdog_dev->pretimeout;
+
+       watchdog_set_drvdata(wdog_dev, priv);
+       platform_set_drvdata(pdev, priv);
+       gti_wdt_settimeout(wdog_dev, wdog_dev->timeout);
+       watchdog_stop_on_reboot(wdog_dev);
+       watchdog_stop_on_unregister(wdog_dev);
+
+       err = devm_watchdog_register_device(dev, wdog_dev);
+       if (err)
+               return err;
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0)
+               return dev_err_probe(&pdev->dev, irq, "IRQ resource not found\n");
+
+       err = devm_request_irq(dev, irq, gti_wdt_interrupt, 0,
+                              pdev->name, &priv->wdev);
+       if (err)
+               return dev_err_probe(dev, err, "Failed to register interrupt handler\n");
+
+       dev_info(dev, "Watchdog enabled (timeout=%d sec)\n", wdog_dev->timeout);
+       return 0;
+}
+
+static const struct of_device_id gti_wdt_of_match[] = {
+       { .compatible = "marvell,cn9670-wdt", .data = &match_data_octeontx2},
+       { .compatible = "marvell,cn10624-wdt", .data = &match_data_cn10k},
+       { },
+};
+MODULE_DEVICE_TABLE(of, gti_wdt_of_match);
+
+static struct platform_driver gti_wdt_driver = {
+       .driver = {
+               .name = "gti-wdt",
+               .of_match_table = gti_wdt_of_match,
+       },
+       .probe = gti_wdt_probe,
+};
+module_platform_driver(gti_wdt_driver);
+
+MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>");
+MODULE_DESCRIPTION("Marvell GTI watchdog driver");
+MODULE_LICENSE("GPL");
index 3c98030..c7de302 100644 (file)
@@ -153,7 +153,6 @@ MODULE_DEVICE_TABLE(mcb, men_z069_ids);
 static struct mcb_driver men_z069_driver = {
        .driver = {
                .name = "z069-wdt",
-               .owner = THIS_MODULE,
        },
        .probe = men_z069_probe,
        .remove = men_z069_remove,
index 35d80cb..a48622d 100644 (file)
@@ -22,7 +22,6 @@
 
 #define GXBB_WDT_CTRL_CLKDIV_EN                        BIT(25)
 #define GXBB_WDT_CTRL_CLK_EN                   BIT(24)
-#define GXBB_WDT_CTRL_EE_RESET                 BIT(21)
 #define GXBB_WDT_CTRL_EN                       BIT(18)
 #define GXBB_WDT_CTRL_DIV_MASK                 (BIT(18) - 1)
 
@@ -45,6 +44,10 @@ struct meson_gxbb_wdt {
        struct clk *clk;
 };
 
+struct wdt_params {
+       u32 rst;
+};
+
 static int meson_gxbb_wdt_start(struct watchdog_device *wdt_dev)
 {
        struct meson_gxbb_wdt *data = watchdog_get_drvdata(wdt_dev);
@@ -140,8 +143,17 @@ static const struct dev_pm_ops meson_gxbb_wdt_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(meson_gxbb_wdt_suspend, meson_gxbb_wdt_resume)
 };
 
+static const struct wdt_params gxbb_params = {
+       .rst = BIT(21),
+};
+
+static const struct wdt_params t7_params = {
+       .rst = BIT(22),
+};
+
 static const struct of_device_id meson_gxbb_wdt_dt_ids[] = {
-        { .compatible = "amlogic,meson-gxbb-wdt", },
+        { .compatible = "amlogic,meson-gxbb-wdt", .data = &gxbb_params, },
+        { .compatible = "amlogic,t7-wdt", .data = &t7_params, },
         { /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, meson_gxbb_wdt_dt_ids);
@@ -150,6 +162,7 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct meson_gxbb_wdt *data;
+       struct wdt_params *params;
        u32 ctrl_reg;
 
        data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
@@ -164,6 +177,8 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
        if (IS_ERR(data->clk))
                return PTR_ERR(data->clk);
 
+       params = (struct wdt_params *)of_device_get_match_data(dev);
+
        platform_set_drvdata(pdev, data);
 
        data->wdt_dev.parent = dev;
@@ -191,7 +206,7 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
        /* Setup with 1ms timebase */
        ctrl_reg |= ((clk_get_rate(data->clk) / 1000) &
                        GXBB_WDT_CTRL_DIV_MASK) |
-                       GXBB_WDT_CTRL_EE_RESET |
+                       params->rst |
                        GXBB_WDT_CTRL_CLK_EN |
                        GXBB_WDT_CTRL_CLKDIV_EN;
 
index 539feaa..497496f 100644 (file)
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
 
index 1c569be..867f9f3 100644 (file)
@@ -16,8 +16,8 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/watchdog.h>
 #include <linux/io.h>
index a9c4375..b2330b1 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/types.h>
index 2a079ca..05657dc 100644 (file)
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/ioport.h>
 #include <linux/watchdog.h>
 #include <linux/io.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
 
 /* Register offsets for the Wdt device */
 #define XWT_TWCSR0_OFFSET   0x0 /* Control/Status Register0 */
index bc4ccdd..ab06824 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/watchdog.h>
index 6d1a002..1d282de 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/watchdog.h>
index a98abd0..782b8c2 100644 (file)
@@ -23,8 +23,8 @@
 #include <linux/bitops.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 
 #define DRV_NAME "PIKA-WDT"
 
index f4bfbff..f3fcbeb 100644 (file)
@@ -266,7 +266,7 @@ static struct platform_driver pm8916_wdt_driver = {
        .probe = pm8916_wdt_probe,
        .driver = {
                .name = "pm8916-wdt",
-               .of_match_table = of_match_ptr(pm8916_wdt_id_table),
+               .of_match_table = pm8916_wdt_id_table,
                .pm = &pm8916_wdt_pm_ops,
        },
 };
index d776474..9e790f0 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
-#include <linux/of_device.h>
 
 enum wdt_reg {
        WDT_RST,
index 2c95615..5d1c217 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/mfd/rave-sp.h>
 #include <linux/module.h>
 #include <linux/nvmem-consumer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
 #include <linux/slab.h>
index c04b383..b293792 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/miscdevice.h>
 #include <linux/watchdog.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
index ce8f18e..8e1be7b 100644 (file)
@@ -14,6 +14,8 @@
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/types.h>
 
 #define DWDST                  BIT(1)
 
+#define PON_REASON_SOF_NUM     0xBBBBCCCC
+#define PON_REASON_MAGIC_NUM   0xDDDDDDDD
+#define PON_REASON_EOF_NUM     0xCCCCBBBB
+#define RESERVED_MEM_MIN_SIZE  12
+
 static int heartbeat = DEFAULT_HEARTBEAT;
 
 /*
@@ -198,6 +205,11 @@ static int rti_wdt_probe(struct platform_device *pdev)
        struct rti_wdt_device *wdt;
        struct clk *clk;
        u32 last_ping = 0;
+       struct device_node *node;
+       u32 reserved_mem_size;
+       struct resource res;
+       u32 *vaddr;
+       u64 paddr;
 
        wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
        if (!wdt)
@@ -284,6 +296,42 @@ static int rti_wdt_probe(struct platform_device *pdev)
                }
        }
 
+       node = of_parse_phandle(pdev->dev.of_node, "memory-region", 0);
+       if (node) {
+               ret = of_address_to_resource(node, 0, &res);
+               if (ret) {
+                       dev_err(dev, "No memory address assigned to the region.\n");
+                       goto err_iomap;
+               }
+
+               /*
+                * If reserved memory is defined for watchdog reset cause.
+                * Readout the Power-on(PON) reason and pass to bootstatus.
+                */
+               paddr = res.start;
+               reserved_mem_size = resource_size(&res);
+               if (reserved_mem_size < RESERVED_MEM_MIN_SIZE) {
+                       dev_err(dev, "The size of reserved memory is too small.\n");
+                       ret = -EINVAL;
+                       goto err_iomap;
+               }
+
+               vaddr = memremap(paddr, reserved_mem_size, MEMREMAP_WB);
+               if (!vaddr) {
+                       dev_err(dev, "Failed to map memory-region.\n");
+                       ret = -ENOMEM;
+                       goto err_iomap;
+               }
+
+               if (vaddr[0] == PON_REASON_SOF_NUM &&
+                   vaddr[1] == PON_REASON_MAGIC_NUM &&
+                   vaddr[2] == PON_REASON_EOF_NUM) {
+                       wdd->bootstatus |= WDIOF_CARDRESET;
+               }
+               memset(vaddr, 0, reserved_mem_size);
+               memunmap(vaddr);
+       }
+
        watchdog_init_timeout(wdd, heartbeat, dev);
 
        ret = watchdog_register_device(wdd);
index fe6c2ed..cb4901b 100644 (file)
@@ -9,9 +9,9 @@
 #include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
index d404953..1741f98 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
index 95416a9..0b4bd88 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <linux/delay.h>
@@ -379,10 +378,11 @@ static int s3c2410wdt_enable(struct s3c2410_wdt *wdt, bool en)
 static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
 {
        struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+       unsigned long flags;
 
-       spin_lock(&wdt->lock);
+       spin_lock_irqsave(&wdt->lock, flags);
        writel(wdt->count, wdt->reg_base + S3C2410_WTCNT);
-       spin_unlock(&wdt->lock);
+       spin_unlock_irqrestore(&wdt->lock, flags);
 
        return 0;
 }
@@ -399,10 +399,11 @@ static void __s3c2410wdt_stop(struct s3c2410_wdt *wdt)
 static int s3c2410wdt_stop(struct watchdog_device *wdd)
 {
        struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+       unsigned long flags;
 
-       spin_lock(&wdt->lock);
+       spin_lock_irqsave(&wdt->lock, flags);
        __s3c2410wdt_stop(wdt);
-       spin_unlock(&wdt->lock);
+       spin_unlock_irqrestore(&wdt->lock, flags);
 
        return 0;
 }
@@ -411,8 +412,9 @@ static int s3c2410wdt_start(struct watchdog_device *wdd)
 {
        unsigned long wtcon;
        struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+       unsigned long flags;
 
-       spin_lock(&wdt->lock);
+       spin_lock_irqsave(&wdt->lock, flags);
 
        __s3c2410wdt_stop(wdt);
 
@@ -433,7 +435,7 @@ static int s3c2410wdt_start(struct watchdog_device *wdd)
        writel(wdt->count, wdt->reg_base + S3C2410_WTDAT);
        writel(wdt->count, wdt->reg_base + S3C2410_WTCNT);
        writel(wtcon, wdt->reg_base + S3C2410_WTCON);
-       spin_unlock(&wdt->lock);
+       spin_unlock_irqrestore(&wdt->lock, flags);
 
        return 0;
 }
index aeee934..13e7291 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
@@ -255,6 +254,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
        struct sama5d4_wdt *wdt;
        void __iomem *regs;
        u32 irq = 0;
+       u32 reg;
        int ret;
 
        wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
@@ -305,6 +305,12 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 
        watchdog_init_timeout(wdd, wdt_timeout, dev);
 
+       reg = wdt_read(wdt, AT91_WDT_MR);
+       if (!(reg & AT91_WDT_WDDIS)) {
+               wdt->mr &= ~AT91_WDT_WDDIS;
+               set_bit(WDOG_HW_RUNNING, &wdd->status);
+       }
+
        ret = sama5d4_wdt_init(wdt);
        if (ret)
                return ret;
index fd3cfdd..421ebcd 100644 (file)
 #include <linux/io.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/uaccess.h>
 #include <linux/watchdog.h>
index 8058fca..5f501b4 100644 (file)
@@ -8,7 +8,8 @@
 #include <linux/clk.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 #include <linux/watchdog.h>
@@ -526,7 +527,6 @@ static void starfive_wdt_shutdown(struct platform_device *pdev)
        starfive_wdt_pm_stop(&wdt->wdd);
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int starfive_wdt_suspend(struct device *dev)
 {
        struct starfive_wdt *wdt = dev_get_drvdata(dev);
@@ -556,9 +556,7 @@ static int starfive_wdt_resume(struct device *dev)
 
        return starfive_wdt_start(wdt);
 }
-#endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM
 static int starfive_wdt_runtime_suspend(struct device *dev)
 {
        struct starfive_wdt *wdt = dev_get_drvdata(dev);
@@ -574,11 +572,10 @@ static int starfive_wdt_runtime_resume(struct device *dev)
 
        return starfive_wdt_enable_clock(wdt);
 }
-#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops starfive_wdt_pm_ops = {
-       SET_RUNTIME_PM_OPS(starfive_wdt_runtime_suspend, starfive_wdt_runtime_resume, NULL)
-       SET_SYSTEM_SLEEP_PM_OPS(starfive_wdt_suspend, starfive_wdt_resume)
+       RUNTIME_PM_OPS(starfive_wdt_runtime_suspend, starfive_wdt_runtime_resume, NULL)
+       SYSTEM_SLEEP_PM_OPS(starfive_wdt_suspend, starfive_wdt_resume)
 };
 
 static const struct of_device_id starfive_wdt_match[] = {
@@ -594,7 +591,7 @@ static struct platform_driver starfive_wdt_driver = {
        .shutdown = starfive_wdt_shutdown,
        .driver = {
                .name = "starfive-wdt",
-               .pm = &starfive_wdt_pm_ops,
+               .pm = pm_ptr(&starfive_wdt_pm_ops),
                .of_match_table = starfive_wdt_match,
        },
 };
index 570a715..d9fd50d 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
@@ -288,7 +287,7 @@ static struct platform_driver stm32_iwdg_driver = {
        .probe          = stm32_iwdg_probe,
        .driver = {
                .name   = "iwdg",
-               .of_match_table = of_match_ptr(stm32_iwdg_of_match),
+               .of_match_table = stm32_iwdg_of_match,
        },
 };
 module_platform_driver(stm32_iwdg_driver);
index 6cf8292..b85354a 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
index d4c5a73..5b55cca 100644 (file)
@@ -161,7 +161,7 @@ static int watchdog_reboot_notifier(struct notifier_block *nb,
        struct watchdog_device *wdd;
 
        wdd = container_of(nb, struct watchdog_device, reboot_nb);
-       if (code == SYS_DOWN || code == SYS_HALT) {
+       if (code == SYS_DOWN || code == SYS_HALT || code == SYS_POWER_OFF) {
                if (watchdog_hw_running(wdd)) {
                        int ret;
 
index 2585038..d271e2e 100644 (file)
@@ -9,9 +9,10 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
+#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
+#include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
 /* Max timeout is calculated at 100MHz source clock */
@@ -71,7 +72,7 @@ static int xilinx_wwdt_start(struct watchdog_device *wdd)
 
        /* Calculate timeout count */
        time_out = xdev->freq * wdd->timeout;
-       closed_timeout = (time_out * xdev->close_percent) / 100;
+       closed_timeout = div_u64(time_out * xdev->close_percent, 100);
        open_timeout = time_out - closed_timeout;
        wdd->min_hw_heartbeat_ms = xdev->close_percent * 10 * wdd->timeout;
 
index 50c635d..1f77ca0 100644 (file)
@@ -12,3 +12,4 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
 
 ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
 ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o
+ceph-$(CONFIG_FS_ENCRYPTION) += crypto.o
index c91b293..c53a1d2 100644 (file)
@@ -140,7 +140,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                newattrs.ia_ctime = current_time(inode);
                newattrs.ia_mode = new_mode;
                newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-               ret = __ceph_setattr(inode, &newattrs);
+               ret = __ceph_setattr(inode, &newattrs, NULL);
                if (ret)
                        goto out_free;
        }
@@ -151,7 +151,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                        newattrs.ia_ctime = old_ctime;
                        newattrs.ia_mode = old_mode;
                        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-                       __ceph_setattr(inode, &newattrs);
+                       __ceph_setattr(inode, &newattrs, NULL);
                }
                goto out_free;
        }
index 59cbfb8..f486307 100644 (file)
@@ -18,6 +18,7 @@
 #include "mds_client.h"
 #include "cache.h"
 #include "metric.h"
+#include "crypto.h"
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/striper.h>
 
@@ -242,11 +243,13 @@ static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
 
 static void finish_netfs_read(struct ceph_osd_request *req)
 {
-       struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode);
+       struct inode *inode = req->r_inode;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
        struct netfs_io_subrequest *subreq = req->r_priv;
-       int num_pages;
+       struct ceph_osd_req_op *op = &req->r_ops[0];
        int err = req->r_result;
+       bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ);
 
        ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency,
                                 req->r_end_latency, osd_data->length, err);
@@ -260,14 +263,29 @@ static void finish_netfs_read(struct ceph_osd_request *req)
        else if (err == -EBLOCKLISTED)
                fsc->blocklisted = true;
 
-       if (err >= 0 && err < subreq->len)
-               __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+       if (err >= 0) {
+               if (sparse && err > 0)
+                       err = ceph_sparse_ext_map_end(op);
+               if (err < subreq->len)
+                       __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+               if (IS_ENCRYPTED(inode) && err > 0) {
+                       err = ceph_fscrypt_decrypt_extents(inode,
+                                       osd_data->pages, subreq->start,
+                                       op->extent.sparse_ext,
+                                       op->extent.sparse_ext_cnt);
+                       if (err > subreq->len)
+                               err = subreq->len;
+               }
+       }
 
+       if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+               ceph_put_page_vector(osd_data->pages,
+                                    calc_pages_for(osd_data->alignment,
+                                       osd_data->length), false);
+       }
        netfs_subreq_terminated(subreq, err, false);
-
-       num_pages = calc_pages_for(osd_data->alignment, osd_data->length);
-       ceph_put_page_vector(osd_data->pages, num_pages, false);
        iput(req->r_inode);
+       ceph_dec_osd_stopping_blocker(fsc->mdsc);
 }
 
 static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
@@ -334,10 +352,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
        struct ceph_osd_request *req = NULL;
        struct ceph_vino vino = ceph_vino(inode);
        struct iov_iter iter;
-       struct page **pages;
-       size_t page_off;
        int err = 0;
        u64 len = subreq->len;
+       bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
+       u64 off = subreq->start;
 
        if (ceph_inode_is_shutdown(inode)) {
                err = -EIO;
@@ -347,8 +365,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
        if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
                return;
 
-       req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
-                       0, 1, CEPH_OSD_OP_READ,
+       ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
+
+       req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
+                       off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
                        CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
                        NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
        if (IS_ERR(req)) {
@@ -357,20 +377,48 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
                goto out;
        }
 
+       if (sparse) {
+               err = ceph_alloc_sparse_ext_map(&req->r_ops[0]);
+               if (err)
+                       goto out;
+       }
+
        dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
+
        iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
-       err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
-       if (err < 0) {
-               dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
-               goto out;
-       }
 
-       /* should always give us a page-aligned read */
-       WARN_ON_ONCE(page_off);
-       len = err;
-       err = 0;
+       /*
+        * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
+        * encrypted inodes. We'd need infrastructure that handles an iov_iter
+        * instead of page arrays, and we don't have that as of yet. Once the
+        * dust settles on the write helpers and encrypt/decrypt routines for
+        * netfs, we should be able to rework this.
+        */
+       if (IS_ENCRYPTED(inode)) {
+               struct page **pages;
+               size_t page_off;
+
+               err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
+               if (err < 0) {
+                       dout("%s: iov_ter_get_pages_alloc returned %d\n",
+                            __func__, err);
+                       goto out;
+               }
+
+               /* should always give us a page-aligned read */
+               WARN_ON_ONCE(page_off);
+               len = err;
+               err = 0;
 
-       osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
+               osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
+                                                false);
+       } else {
+               osd_req_op_extent_osd_iter(req, 0, &iter);
+       }
+       if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
+               err = -EIO;
+               goto out;
+       }
        req->r_callback = finish_netfs_read;
        req->r_priv = subreq;
        req->r_inode = inode;
@@ -571,10 +619,12 @@ static u64 get_writepages_data_length(struct inode *inode,
                                      struct page *page, u64 start)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_snap_context *snapc = page_snap_context(page);
+       struct ceph_snap_context *snapc;
        struct ceph_cap_snap *capsnap = NULL;
        u64 end = i_size_read(inode);
+       u64 ret;
 
+       snapc = page_snap_context(ceph_fscrypt_pagecache_page(page));
        if (snapc != ci->i_head_snapc) {
                bool found = false;
                spin_lock(&ci->i_ceph_lock);
@@ -589,9 +639,12 @@ static u64 get_writepages_data_length(struct inode *inode,
                spin_unlock(&ci->i_ceph_lock);
                WARN_ON(!found);
        }
-       if (end > page_offset(page) + thp_size(page))
-               end = page_offset(page) + thp_size(page);
-       return end > start ? end - start : 0;
+       if (end > ceph_fscrypt_page_offset(page) + thp_size(page))
+               end = ceph_fscrypt_page_offset(page) + thp_size(page);
+       ret = end > start ? end - start : 0;
+       if (ret && fscrypt_is_bounce_page(page))
+               ret = round_up(ret, CEPH_FSCRYPT_BLOCK_SIZE);
+       return ret;
 }
 
 /*
@@ -610,10 +663,12 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        loff_t page_off = page_offset(page);
        int err;
        loff_t len = thp_size(page);
+       loff_t wlen;
        struct ceph_writeback_ctl ceph_wbc;
        struct ceph_osd_client *osdc = &fsc->client->osdc;
        struct ceph_osd_request *req;
        bool caching = ceph_is_cache_enabled(inode);
+       struct page *bounce_page = NULL;
 
        dout("writepage %p idx %lu\n", page, page->index);
 
@@ -649,31 +704,51 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        if (ceph_wbc.i_size < page_off + len)
                len = ceph_wbc.i_size - page_off;
 
+       wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len;
        dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n",
-            inode, page, page->index, page_off, len, snapc, snapc->seq);
+            inode, page, page->index, page_off, wlen, snapc, snapc->seq);
 
        if (atomic_long_inc_return(&fsc->writeback_count) >
            CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
                fsc->write_congested = true;
 
-       req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
-                                   CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
-                                   ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
-                                   true);
+       req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode),
+                                   page_off, &wlen, 0, 1, CEPH_OSD_OP_WRITE,
+                                   CEPH_OSD_FLAG_WRITE, snapc,
+                                   ceph_wbc.truncate_seq,
+                                   ceph_wbc.truncate_size, true);
        if (IS_ERR(req)) {
                redirty_page_for_writepage(wbc, page);
                return PTR_ERR(req);
        }
 
+       if (wlen < len)
+               len = wlen;
+
        set_page_writeback(page);
        if (caching)
                ceph_set_page_fscache(page);
        ceph_fscache_write_to_cache(inode, page_off, len, caching);
 
+       if (IS_ENCRYPTED(inode)) {
+               bounce_page = fscrypt_encrypt_pagecache_blocks(page,
+                                                   CEPH_FSCRYPT_BLOCK_SIZE, 0,
+                                                   GFP_NOFS);
+               if (IS_ERR(bounce_page)) {
+                       redirty_page_for_writepage(wbc, page);
+                       end_page_writeback(page);
+                       ceph_osdc_put_request(req);
+                       return PTR_ERR(bounce_page);
+               }
+       }
+
        /* it may be a short write due to an object boundary */
        WARN_ON_ONCE(len > thp_size(page));
-       osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
-       dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
+       osd_req_op_extent_osd_data_pages(req, 0,
+                       bounce_page ? &bounce_page : &page, wlen, 0,
+                       false, false);
+       dout("writepage %llu~%llu (%llu bytes, %sencrypted)\n",
+            page_off, len, wlen, IS_ENCRYPTED(inode) ? "" : "not ");
 
        req->r_mtime = inode->i_mtime;
        ceph_osdc_start_request(osdc, req);
@@ -681,7 +756,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 
        ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
                                  req->r_end_latency, len, err);
-
+       fscrypt_free_bounce_page(bounce_page);
        ceph_osdc_put_request(req);
        if (err == 0)
                err = len;
@@ -800,6 +875,11 @@ static void writepages_finish(struct ceph_osd_request *req)
                total_pages += num_pages;
                for (j = 0; j < num_pages; j++) {
                        page = osd_data->pages[j];
+                       if (fscrypt_is_bounce_page(page)) {
+                               page = fscrypt_pagecache_page(page);
+                               fscrypt_free_bounce_page(osd_data->pages[j]);
+                               osd_data->pages[j] = page;
+                       }
                        BUG_ON(!page);
                        WARN_ON(!PageUptodate(page));
 
@@ -835,6 +915,7 @@ static void writepages_finish(struct ceph_osd_request *req)
        else
                kfree(osd_data->pages);
        ceph_osdc_put_request(req);
+       ceph_dec_osd_stopping_blocker(fsc->mdsc);
 }
 
 /*
@@ -1070,9 +1151,28 @@ get_more_pages:
                                    fsc->mount_options->congestion_kb))
                                fsc->write_congested = true;
 
-                       pages[locked_pages++] = page;
-                       fbatch.folios[i] = NULL;
+                       if (IS_ENCRYPTED(inode)) {
+                               pages[locked_pages] =
+                                       fscrypt_encrypt_pagecache_blocks(page,
+                                               PAGE_SIZE, 0,
+                                               locked_pages ? GFP_NOWAIT : GFP_NOFS);
+                               if (IS_ERR(pages[locked_pages])) {
+                                       if (PTR_ERR(pages[locked_pages]) == -EINVAL)
+                                               pr_err("%s: inode->i_blkbits=%hhu\n",
+                                                       __func__, inode->i_blkbits);
+                                       /* better not fail on first page! */
+                                       BUG_ON(locked_pages == 0);
+                                       pages[locked_pages] = NULL;
+                                       redirty_page_for_writepage(wbc, page);
+                                       unlock_page(page);
+                                       break;
+                               }
+                               ++locked_pages;
+                       } else {
+                               pages[locked_pages++] = page;
+                       }
 
+                       fbatch.folios[i] = NULL;
                        len += thp_size(page);
                }
 
@@ -1100,7 +1200,7 @@ get_more_pages:
                }
 
 new_request:
-               offset = page_offset(pages[0]);
+               offset = ceph_fscrypt_page_offset(pages[0]);
                len = wsize;
 
                req = ceph_osdc_new_request(&fsc->client->osdc,
@@ -1121,9 +1221,13 @@ new_request:
                                                ceph_wbc.truncate_size, true);
                        BUG_ON(IS_ERR(req));
                }
-               BUG_ON(len < page_offset(pages[locked_pages - 1]) +
-                            thp_size(page) - offset);
+               BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) +
+                            thp_size(pages[locked_pages - 1]) - offset);
 
+               if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
+                       rc = -EIO;
+                       goto release_folios;
+               }
                req->r_callback = writepages_finish;
                req->r_inode = inode;
 
@@ -1132,7 +1236,9 @@ new_request:
                data_pages = pages;
                op_idx = 0;
                for (i = 0; i < locked_pages; i++) {
-                       u64 cur_offset = page_offset(pages[i]);
+                       struct page *page = ceph_fscrypt_pagecache_page(pages[i]);
+
+                       u64 cur_offset = page_offset(page);
                        /*
                         * Discontinuity in page range? Ceph can handle that by just passing
                         * multiple extents in the write op.
@@ -1161,9 +1267,9 @@ new_request:
                                op_idx++;
                        }
 
-                       set_page_writeback(pages[i]);
+                       set_page_writeback(page);
                        if (caching)
-                               ceph_set_page_fscache(pages[i]);
+                               ceph_set_page_fscache(page);
                        len += thp_size(page);
                }
                ceph_fscache_write_to_cache(inode, offset, len, caching);
@@ -1179,8 +1285,16 @@ new_request:
                                                         offset);
                        len = max(len, min_len);
                }
+               if (IS_ENCRYPTED(inode))
+                       len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE);
+
                dout("writepages got pages at %llu~%llu\n", offset, len);
 
+               if (IS_ENCRYPTED(inode) &&
+                   ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK))
+                       pr_warn("%s: bad encrypted write offset=%lld len=%llu\n",
+                               __func__, offset, len);
+
                osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
                                                 0, from_pool, false);
                osd_req_op_extent_update(req, op_idx, len);
index 09cd6d3..14215ec 100644 (file)
@@ -14,6 +14,7 @@
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
+#include "crypto.h"
 #include <linux/ceph/decode.h>
 #include <linux/ceph/messenger.h>
 
@@ -1216,15 +1217,11 @@ struct cap_msg_args {
        umode_t                 mode;
        bool                    inline_data;
        bool                    wake;
+       bool                    encrypted;
+       u32                     fscrypt_auth_len;
+       u8                      fscrypt_auth[sizeof(struct ceph_fscrypt_auth)]; // for context
 };
 
-/*
- * cap struct size + flock buffer size + inline version + inline data size +
- * osd_epoch_barrier + oldest_flush_tid
- */
-#define CAP_MSG_SIZE (sizeof(struct ceph_mds_caps) + \
-                     4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4)
-
 /* Marshal up the cap msg to the MDS */
 static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
 {
@@ -1240,7 +1237,7 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
             arg->size, arg->max_size, arg->xattr_version,
             arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0);
 
-       msg->hdr.version = cpu_to_le16(10);
+       msg->hdr.version = cpu_to_le16(12);
        msg->hdr.tid = cpu_to_le64(arg->flush_tid);
 
        fc = msg->front.iov_base;
@@ -1257,7 +1254,13 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
        fc->ino = cpu_to_le64(arg->ino);
        fc->snap_follows = cpu_to_le64(arg->follows);
 
-       fc->size = cpu_to_le64(arg->size);
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       if (arg->encrypted)
+               fc->size = cpu_to_le64(round_up(arg->size,
+                                               CEPH_FSCRYPT_BLOCK_SIZE));
+       else
+#endif
+               fc->size = cpu_to_le64(arg->size);
        fc->max_size = cpu_to_le64(arg->max_size);
        ceph_encode_timespec64(&fc->mtime, &arg->mtime);
        ceph_encode_timespec64(&fc->atime, &arg->atime);
@@ -1311,6 +1314,27 @@ static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
 
        /* Advisory flags (version 10) */
        ceph_encode_32(&p, arg->flags);
+
+       /* dirstats (version 11) - these are r/o on the client */
+       ceph_encode_64(&p, 0);
+       ceph_encode_64(&p, 0);
+
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       /*
+        * fscrypt_auth and fscrypt_file (version 12)
+        *
+        * fscrypt_auth holds the crypto context (if any). fscrypt_file
+        * tracks the real i_size as an __le64 field (and we use a rounded-up
+        * i_size in the traditional size field).
+        */
+       ceph_encode_32(&p, arg->fscrypt_auth_len);
+       ceph_encode_copy(&p, arg->fscrypt_auth, arg->fscrypt_auth_len);
+       ceph_encode_32(&p, sizeof(__le64));
+       ceph_encode_64(&p, arg->size);
+#else /* CONFIG_FS_ENCRYPTION */
+       ceph_encode_32(&p, 0);
+       ceph_encode_32(&p, 0);
+#endif /* CONFIG_FS_ENCRYPTION */
 }
 
 /*
@@ -1378,7 +1402,6 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
        arg->follows = flushing ? ci->i_head_snapc->seq : 0;
        arg->flush_tid = flush_tid;
        arg->oldest_flush_tid = oldest_flush_tid;
-
        arg->size = i_size_read(inode);
        ci->i_reported_size = arg->size;
        arg->max_size = ci->i_wanted_max_size;
@@ -1432,8 +1455,39 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
                }
        }
        arg->flags = flags;
+       arg->encrypted = IS_ENCRYPTED(inode);
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       if (ci->fscrypt_auth_len &&
+           WARN_ON_ONCE(ci->fscrypt_auth_len > sizeof(struct ceph_fscrypt_auth))) {
+               /* Don't set this if it's too big */
+               arg->fscrypt_auth_len = 0;
+       } else {
+               arg->fscrypt_auth_len = ci->fscrypt_auth_len;
+               memcpy(arg->fscrypt_auth, ci->fscrypt_auth,
+                      min_t(size_t, ci->fscrypt_auth_len,
+                            sizeof(arg->fscrypt_auth)));
+       }
+#endif /* CONFIG_FS_ENCRYPTION */
 }
 
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
+                     4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4 + 8)
+
+static inline int cap_msg_size(struct cap_msg_args *arg)
+{
+       return CAP_MSG_FIXED_FIELDS + arg->fscrypt_auth_len;
+}
+#else
+#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
+                     4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4)
+
+static inline int cap_msg_size(struct cap_msg_args *arg)
+{
+       return CAP_MSG_FIXED_FIELDS;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
 /*
  * Send a cap msg on the given inode.
  *
@@ -1444,7 +1498,8 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
        struct ceph_msg *msg;
        struct inode *inode = &ci->netfs.inode;
 
-       msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
+       msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS,
+                          false);
        if (!msg) {
                pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n",
                       ceph_vinop(inode), ceph_cap_string(arg->dirty),
@@ -1470,10 +1525,6 @@ static inline int __send_flush_snap(struct inode *inode,
        struct cap_msg_args     arg;
        struct ceph_msg         *msg;
 
-       msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
-       if (!msg)
-               return -ENOMEM;
-
        arg.session = session;
        arg.ino = ceph_vino(inode).ino;
        arg.cid = 0;
@@ -1510,6 +1561,15 @@ static inline int __send_flush_snap(struct inode *inode,
        arg.inline_data = capsnap->inline_data;
        arg.flags = 0;
        arg.wake = false;
+       arg.encrypted = IS_ENCRYPTED(inode);
+
+       /* No fscrypt_auth changes from a capsnap.*/
+       arg.fscrypt_auth_len = 0;
+
+       msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(&arg),
+                          GFP_NOFS, false);
+       if (!msg)
+               return -ENOMEM;
 
        encode_cap_msg(msg, &arg);
        ceph_con_send(&arg.session->s_con, msg);
@@ -2900,10 +2960,9 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
  * due to a small max_size, make sure we check_max_size (and possibly
  * ask the mds) so we don't get hung up indefinitely.
  */
-int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got)
+int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
+                   int want, loff_t endoff, int *got)
 {
-       struct ceph_file_info *fi = filp->private_data;
-       struct inode *inode = file_inode(filp);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        int ret, _got, flags;
@@ -2912,7 +2971,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
        if (ret < 0)
                return ret;
 
-       if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+       if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
            fi->filp_gen != READ_ONCE(fsc->filp_gen))
                return -EBADF;
 
@@ -2965,7 +3024,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
                                continue;
                }
 
-               if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+               if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
                    fi->filp_gen != READ_ONCE(fsc->filp_gen)) {
                        if (ret >= 0 && _got)
                                ceph_put_cap_refs(ci, _got);
@@ -3028,6 +3087,15 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
        return 0;
 }
 
+int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff,
+                 int *got)
+{
+       struct ceph_file_info *fi = filp->private_data;
+       struct inode *inode = file_inode(filp);
+
+       return __ceph_get_caps(inode, fi, need, want, endoff, got);
+}
+
 /*
  * Take cap refs.  Caller must already know we hold at least one ref
  * on the caps in question or we don't know this is safe.
@@ -3323,6 +3391,9 @@ struct cap_extra_info {
        /* currently issued */
        int issued;
        struct timespec64 btime;
+       u8 *fscrypt_auth;
+       u32 fscrypt_auth_len;
+       u64 fscrypt_file_size;
 };
 
 /*
@@ -3355,6 +3426,14 @@ static void handle_cap_grant(struct inode *inode,
        bool deleted_inode = false;
        bool fill_inline = false;
 
+       /*
+        * If there is at least one crypto block then we'll trust
+        * fscrypt_file_size. If the real length of the file is 0, then
+        * ignore it (it has probably been truncated down to 0 by the MDS).
+        */
+       if (IS_ENCRYPTED(inode) && size)
+               size = extra_info->fscrypt_file_size;
+
        dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
             inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
        dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
@@ -3421,6 +3500,14 @@ static void handle_cap_grant(struct inode *inode,
                dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
                     from_kuid(&init_user_ns, inode->i_uid),
                     from_kgid(&init_user_ns, inode->i_gid));
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+               if (ci->fscrypt_auth_len != extra_info->fscrypt_auth_len ||
+                   memcmp(ci->fscrypt_auth, extra_info->fscrypt_auth,
+                          ci->fscrypt_auth_len))
+                       pr_warn_ratelimited("%s: cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n",
+                               __func__, ci->fscrypt_auth_len,
+                               extra_info->fscrypt_auth_len);
+#endif
        }
 
        if ((newcaps & CEPH_CAP_LINK_SHARED) &&
@@ -3837,7 +3924,8 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
  */
 static bool handle_cap_trunc(struct inode *inode,
                             struct ceph_mds_caps *trunc,
-                            struct ceph_mds_session *session)
+                            struct ceph_mds_session *session,
+                            struct cap_extra_info *extra_info)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        int mds = session->s_mds;
@@ -3854,8 +3942,16 @@ static bool handle_cap_trunc(struct inode *inode,
 
        issued |= implemented | dirty;
 
-       dout("handle_cap_trunc inode %p mds%d seq %d to %lld seq %d\n",
-            inode, mds, seq, truncate_size, truncate_seq);
+       /*
+        * If there is at least one crypto block then we'll trust
+        * fscrypt_file_size. If the real length of the file is 0, then
+        * ignore it (it has probably been truncated down to 0 by the MDS).
+        */
+       if (IS_ENCRYPTED(inode) && size)
+               size = extra_info->fscrypt_file_size;
+
+       dout("%s inode %p mds%d seq %d to %lld truncate seq %d\n",
+            __func__, inode, mds, seq, truncate_size, truncate_seq);
        queue_trunc = ceph_fill_file_size(inode, issued,
                                          truncate_seq, truncate_size, size);
        return queue_trunc;
@@ -4075,6 +4171,52 @@ retry:
        *target_cap = cap;
 }
 
+#ifdef CONFIG_FS_ENCRYPTION
+static int parse_fscrypt_fields(void **p, void *end,
+                               struct cap_extra_info *extra)
+{
+       u32 len;
+
+       ceph_decode_32_safe(p, end, extra->fscrypt_auth_len, bad);
+       if (extra->fscrypt_auth_len) {
+               ceph_decode_need(p, end, extra->fscrypt_auth_len, bad);
+               extra->fscrypt_auth = kmalloc(extra->fscrypt_auth_len,
+                                             GFP_KERNEL);
+               if (!extra->fscrypt_auth)
+                       return -ENOMEM;
+               ceph_decode_copy_safe(p, end, extra->fscrypt_auth,
+                                       extra->fscrypt_auth_len, bad);
+       }
+
+       ceph_decode_32_safe(p, end, len, bad);
+       if (len >= sizeof(u64)) {
+               ceph_decode_64_safe(p, end, extra->fscrypt_file_size, bad);
+               len -= sizeof(u64);
+       }
+       ceph_decode_skip_n(p, end, len, bad);
+       return 0;
+bad:
+       return -EIO;
+}
+#else
+static int parse_fscrypt_fields(void **p, void *end,
+                               struct cap_extra_info *extra)
+{
+       u32 len;
+
+       /* Don't care about these fields unless we're encryption-capable */
+       ceph_decode_32_safe(p, end, len, bad);
+       if (len)
+               ceph_decode_skip_n(p, end, len, bad);
+       ceph_decode_32_safe(p, end, len, bad);
+       if (len)
+               ceph_decode_skip_n(p, end, len, bad);
+       return 0;
+bad:
+       return -EIO;
+}
+#endif
+
 /*
  * Handle a caps message from the MDS.
  *
@@ -4105,6 +4247,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 
        dout("handle_caps from mds%d\n", session->s_mds);
 
+       if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+               return;
+
        /* decode */
        end = msg->front.iov_base + msg->front.iov_len;
        if (msg->front.iov_len < sizeof(*h))
@@ -4195,13 +4340,17 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                ceph_decode_64_safe(&p, end, extra_info.nsubdirs, bad);
        }
 
+       if (msg_version >= 12) {
+               if (parse_fscrypt_fields(&p, end, &extra_info))
+                       goto bad;
+       }
+
        /* lookup ino */
        inode = ceph_find_inode(mdsc->fsc->sb, vino);
        dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
             vino.snap, inode);
 
        mutex_lock(&session->s_mutex);
-       inc_session_sequence(session);
        dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
             (unsigned)seq);
 
@@ -4292,7 +4441,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                break;
 
        case CEPH_CAP_OP_TRUNC:
-               queue_trunc = handle_cap_trunc(inode, h, session);
+               queue_trunc = handle_cap_trunc(inode, h, session,
+                                               &extra_info);
                spin_unlock(&ci->i_ceph_lock);
                if (queue_trunc)
                        ceph_queue_vmtruncate(inode);
@@ -4309,12 +4459,15 @@ done:
 done_unlocked:
        iput(inode);
 out:
+       ceph_dec_mds_stopping_blocker(mdsc);
+
        ceph_put_string(extra_info.pool_ns);
 
        /* Defer closing the sessions after s_mutex lock being released */
        if (close_sessions)
                ceph_mdsc_close_sessions(mdsc);
 
+       kfree(extra_info.fscrypt_auth);
        return;
 
 flush_cap_releases:
@@ -4611,6 +4764,18 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
        return ret;
 }
 
+/**
+ * ceph_encode_dentry_release - encode a dentry release into an outgoing request
+ * @p: outgoing request buffer
+ * @dentry: dentry to release
+ * @dir: dir to release it from
+ * @mds: mds that we're speaking to
+ * @drop: caps being dropped
+ * @unless: unless we have these caps
+ *
+ * Encode a dentry release into an outgoing request buffer. Returns 1 if the
+ * thing was released, or a negative error code otherwise.
+ */
 int ceph_encode_dentry_release(void **p, struct dentry *dentry,
                               struct inode *dir,
                               int mds, int drop, int unless)
@@ -4643,13 +4808,25 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
        if (ret && di->lease_session && di->lease_session->s_mds == mds) {
                dout("encode_dentry_release %p mds%d seq %d\n",
                     dentry, mds, (int)di->lease_seq);
-               rel->dname_len = cpu_to_le32(dentry->d_name.len);
-               memcpy(*p, dentry->d_name.name, dentry->d_name.len);
-               *p += dentry->d_name.len;
                rel->dname_seq = cpu_to_le32(di->lease_seq);
                __ceph_mdsc_drop_dentry_lease(dentry);
+               spin_unlock(&dentry->d_lock);
+               if (IS_ENCRYPTED(dir) && fscrypt_has_encryption_key(dir)) {
+                       int ret2 = ceph_encode_encrypted_fname(dir, dentry, *p);
+
+                       if (ret2 < 0)
+                               return ret2;
+
+                       rel->dname_len = cpu_to_le32(ret2);
+                       *p += ret2;
+               } else {
+                       rel->dname_len = cpu_to_le32(dentry->d_name.len);
+                       memcpy(*p, dentry->d_name.name, dentry->d_name.len);
+                       *p += dentry->d_name.len;
+               }
+       } else {
+               spin_unlock(&dentry->d_lock);
        }
-       spin_unlock(&dentry->d_lock);
        return ret;
 }
 
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c
new file mode 100644 (file)
index 0000000..e4d5cd5
--- /dev/null
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The base64 encode/decode code was copied from fscrypt:
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility
+ * Written by Uday Savagaonkar, 2014.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+#include <linux/ceph/ceph_debug.h>
+#include <linux/xattr.h>
+#include <linux/fscrypt.h>
+#include <linux/ceph/striper.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "crypto.h"
+
+/*
+ * The base64url encoding used by fscrypt includes the '_' character, which may
+ * cause problems in snapshot names (which can not start with '_').  Thus, we
+ * used the base64 encoding defined for IMAP mailbox names (RFC 3501) instead,
+ * which replaces '-' and '_' by '+' and ','.
+ */
+static const char base64_table[65] =
+       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+int ceph_base64_encode(const u8 *src, int srclen, char *dst)
+{
+       u32 ac = 0;
+       int bits = 0;
+       int i;
+       char *cp = dst;
+
+       for (i = 0; i < srclen; i++) {
+               ac = (ac << 8) | src[i];
+               bits += 8;
+               do {
+                       bits -= 6;
+                       *cp++ = base64_table[(ac >> bits) & 0x3f];
+               } while (bits >= 6);
+       }
+       if (bits)
+               *cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
+       return cp - dst;
+}
+
+int ceph_base64_decode(const char *src, int srclen, u8 *dst)
+{
+       u32 ac = 0;
+       int bits = 0;
+       int i;
+       u8 *bp = dst;
+
+       for (i = 0; i < srclen; i++) {
+               const char *p = strchr(base64_table, src[i]);
+
+               if (p == NULL || src[i] == 0)
+                       return -1;
+               ac = (ac << 6) | (p - base64_table);
+               bits += 6;
+               if (bits >= 8) {
+                       bits -= 8;
+                       *bp++ = (u8)(ac >> bits);
+               }
+       }
+       if (ac & ((1 << bits) - 1))
+               return -1;
+       return bp - dst;
+}
+
+static int ceph_crypt_get_context(struct inode *inode, void *ctx, size_t len)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_fscrypt_auth *cfa = (struct ceph_fscrypt_auth *)ci->fscrypt_auth;
+       u32 ctxlen;
+
+       /* Non existent or too short? */
+       if (!cfa || (ci->fscrypt_auth_len < (offsetof(struct ceph_fscrypt_auth, cfa_blob) + 1)))
+               return -ENOBUFS;
+
+       /* Some format we don't recognize? */
+       if (le32_to_cpu(cfa->cfa_version) != CEPH_FSCRYPT_AUTH_VERSION)
+               return -ENOBUFS;
+
+       ctxlen = le32_to_cpu(cfa->cfa_blob_len);
+       if (len < ctxlen)
+               return -ERANGE;
+
+       memcpy(ctx, cfa->cfa_blob, ctxlen);
+       return ctxlen;
+}
+
+static int ceph_crypt_set_context(struct inode *inode, const void *ctx,
+                                 size_t len, void *fs_data)
+{
+       int ret;
+       struct iattr attr = { };
+       struct ceph_iattr cia = { };
+       struct ceph_fscrypt_auth *cfa;
+
+       WARN_ON_ONCE(fs_data);
+
+       if (len > FSCRYPT_SET_CONTEXT_MAX_SIZE)
+               return -EINVAL;
+
+       cfa = kzalloc(sizeof(*cfa), GFP_KERNEL);
+       if (!cfa)
+               return -ENOMEM;
+
+       cfa->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION);
+       cfa->cfa_blob_len = cpu_to_le32(len);
+       memcpy(cfa->cfa_blob, ctx, len);
+
+       cia.fscrypt_auth = cfa;
+
+       ret = __ceph_setattr(inode, &attr, &cia);
+       if (ret == 0)
+               inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED);
+       kfree(cia.fscrypt_auth);
+       return ret;
+}
+
+static bool ceph_crypt_empty_dir(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       return ci->i_rsubdirs + ci->i_rfiles == 1;
+}
+
+static const union fscrypt_policy *ceph_get_dummy_policy(struct super_block *sb)
+{
+       return ceph_sb_to_client(sb)->fsc_dummy_enc_policy.policy;
+}
+
+static struct fscrypt_operations ceph_fscrypt_ops = {
+       .get_context            = ceph_crypt_get_context,
+       .set_context            = ceph_crypt_set_context,
+       .get_dummy_policy       = ceph_get_dummy_policy,
+       .empty_dir              = ceph_crypt_empty_dir,
+};
+
+void ceph_fscrypt_set_ops(struct super_block *sb)
+{
+       fscrypt_set_ops(sb, &ceph_fscrypt_ops);
+}
+
+void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc)
+{
+       fscrypt_free_dummy_policy(&fsc->fsc_dummy_enc_policy);
+}
+
+int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode,
+                                struct ceph_acl_sec_ctx *as)
+{
+       int ret, ctxsize;
+       bool encrypted = false;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       ret = fscrypt_prepare_new_inode(dir, inode, &encrypted);
+       if (ret)
+               return ret;
+       if (!encrypted)
+               return 0;
+
+       as->fscrypt_auth = kzalloc(sizeof(*as->fscrypt_auth), GFP_KERNEL);
+       if (!as->fscrypt_auth)
+               return -ENOMEM;
+
+       ctxsize = fscrypt_context_for_new_inode(as->fscrypt_auth->cfa_blob,
+                                               inode);
+       if (ctxsize < 0)
+               return ctxsize;
+
+       as->fscrypt_auth->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION);
+       as->fscrypt_auth->cfa_blob_len = cpu_to_le32(ctxsize);
+
+       WARN_ON_ONCE(ci->fscrypt_auth);
+       kfree(ci->fscrypt_auth);
+       ci->fscrypt_auth_len = ceph_fscrypt_auth_len(as->fscrypt_auth);
+       ci->fscrypt_auth = kmemdup(as->fscrypt_auth, ci->fscrypt_auth_len,
+                                  GFP_KERNEL);
+       if (!ci->fscrypt_auth)
+               return -ENOMEM;
+
+       inode->i_flags |= S_ENCRYPTED;
+
+       return 0;
+}
+
+void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
+                               struct ceph_acl_sec_ctx *as)
+{
+       swap(req->r_fscrypt_auth, as->fscrypt_auth);
+}
+
+/*
+ * User-created snapshots can't start with '_'.  Snapshots that start with this
+ * character are special (hint: there aren't real snapshots) and use the
+ * following format:
+ *
+ *   _<SNAPSHOT-NAME>_<INODE-NUMBER>
+ *
+ * where:
+ *  - <SNAPSHOT-NAME> - the real snapshot name that may need to be decrypted,
+ *  - <INODE-NUMBER> - the inode number (in decimal) for the actual snapshot
+ *
+ * This function parses these snapshot names and returns the inode
+ * <INODE-NUMBER>.  'name_len' will also bet set with the <SNAPSHOT-NAME>
+ * length.
+ */
+static struct inode *parse_longname(const struct inode *parent,
+                                   const char *name, int *name_len)
+{
+       struct inode *dir = NULL;
+       struct ceph_vino vino = { .snap = CEPH_NOSNAP };
+       char *inode_number;
+       char *name_end;
+       int orig_len = *name_len;
+       int ret = -EIO;
+
+       /* Skip initial '_' */
+       name++;
+       name_end = strrchr(name, '_');
+       if (!name_end) {
+               dout("Failed to parse long snapshot name: %s\n", name);
+               return ERR_PTR(-EIO);
+       }
+       *name_len = (name_end - name);
+       if (*name_len <= 0) {
+               pr_err("Failed to parse long snapshot name\n");
+               return ERR_PTR(-EIO);
+       }
+
+       /* Get the inode number */
+       inode_number = kmemdup_nul(name_end + 1,
+                                  orig_len - *name_len - 2,
+                                  GFP_KERNEL);
+       if (!inode_number)
+               return ERR_PTR(-ENOMEM);
+       ret = kstrtou64(inode_number, 10, &vino.ino);
+       if (ret) {
+               dout("Failed to parse inode number: %s\n", name);
+               dir = ERR_PTR(ret);
+               goto out;
+       }
+
+       /* And finally the inode */
+       dir = ceph_find_inode(parent->i_sb, vino);
+       if (!dir) {
+               /* This can happen if we're not mounting cephfs on the root */
+               dir = ceph_get_inode(parent->i_sb, vino, NULL);
+               if (!dir)
+                       dir = ERR_PTR(-ENOENT);
+       }
+       if (IS_ERR(dir))
+               dout("Can't find inode %s (%s)\n", inode_number, name);
+
+out:
+       kfree(inode_number);
+       return dir;
+}
+
+int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
+                               char *buf)
+{
+       struct inode *dir = parent;
+       struct qstr iname;
+       u32 len;
+       int name_len;
+       int elen;
+       int ret;
+       u8 *cryptbuf = NULL;
+
+       iname.name = d_name->name;
+       name_len = d_name->len;
+
+       /* Handle the special case of snapshot names that start with '_' */
+       if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) &&
+           (iname.name[0] == '_')) {
+               dir = parse_longname(parent, iname.name, &name_len);
+               if (IS_ERR(dir))
+                       return PTR_ERR(dir);
+               iname.name++; /* skip initial '_' */
+       }
+       iname.len = name_len;
+
+       if (!fscrypt_has_encryption_key(dir)) {
+               memcpy(buf, d_name->name, d_name->len);
+               elen = d_name->len;
+               goto out;
+       }
+
+       /*
+        * Convert cleartext d_name to ciphertext. If result is longer than
+        * CEPH_NOHASH_NAME_MAX, sha256 the remaining bytes
+        *
+        * See: fscrypt_setup_filename
+        */
+       if (!fscrypt_fname_encrypted_size(dir, iname.len, NAME_MAX, &len)) {
+               elen = -ENAMETOOLONG;
+               goto out;
+       }
+
+       /* Allocate a buffer appropriate to hold the result */
+       cryptbuf = kmalloc(len > CEPH_NOHASH_NAME_MAX ? NAME_MAX : len,
+                          GFP_KERNEL);
+       if (!cryptbuf) {
+               elen = -ENOMEM;
+               goto out;
+       }
+
+       ret = fscrypt_fname_encrypt(dir, &iname, cryptbuf, len);
+       if (ret) {
+               elen = ret;
+               goto out;
+       }
+
+       /* hash the end if the name is long enough */
+       if (len > CEPH_NOHASH_NAME_MAX) {
+               u8 hash[SHA256_DIGEST_SIZE];
+               u8 *extra = cryptbuf + CEPH_NOHASH_NAME_MAX;
+
+               /*
+                * hash the extra bytes and overwrite crypttext beyond that
+                * point with it
+                */
+               sha256(extra, len - CEPH_NOHASH_NAME_MAX, hash);
+               memcpy(extra, hash, SHA256_DIGEST_SIZE);
+               len = CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE;
+       }
+
+       /* base64 encode the encrypted name */
+       elen = ceph_base64_encode(cryptbuf, len, buf);
+       dout("base64-encoded ciphertext name = %.*s\n", elen, buf);
+
+       /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */
+       WARN_ON(elen > 240);
+       if ((elen > 0) && (dir != parent)) {
+               char tmp_buf[NAME_MAX];
+
+               elen = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld",
+                               elen, buf, dir->i_ino);
+               memcpy(buf, tmp_buf, elen);
+       }
+
+out:
+       kfree(cryptbuf);
+       if (dir != parent) {
+               if ((dir->i_state & I_NEW))
+                       discard_new_inode(dir);
+               else
+                       iput(dir);
+       }
+       return elen;
+}
+
+int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry,
+                               char *buf)
+{
+       WARN_ON_ONCE(!fscrypt_has_encryption_key(parent));
+
+       return ceph_encode_encrypted_dname(parent, &dentry->d_name, buf);
+}
+
+/**
+ * ceph_fname_to_usr - convert a filename for userland presentation
+ * @fname: ceph_fname to be converted
+ * @tname: temporary name buffer to use for conversion (may be NULL)
+ * @oname: where converted name should be placed
+ * @is_nokey: set to true if key wasn't available during conversion (may be NULL)
+ *
+ * Given a filename (usually from the MDS), format it for presentation to
+ * userland. If @parent is not encrypted, just pass it back as-is.
+ *
+ * Otherwise, base64 decode the string, and then ask fscrypt to format it
+ * for userland presentation.
+ *
+ * Returns 0 on success or negative error code on error.
+ */
+int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
+                     struct fscrypt_str *oname, bool *is_nokey)
+{
+       struct inode *dir = fname->dir;
+       struct fscrypt_str _tname = FSTR_INIT(NULL, 0);
+       struct fscrypt_str iname;
+       char *name = fname->name;
+       int name_len = fname->name_len;
+       int ret;
+
+       /* Sanity check that the resulting name will fit in the buffer */
+       if (fname->name_len > NAME_MAX || fname->ctext_len > NAME_MAX)
+               return -EIO;
+
+       /* Handle the special case of snapshot names that start with '_' */
+       if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) &&
+           (name[0] == '_')) {
+               dir = parse_longname(dir, name, &name_len);
+               if (IS_ERR(dir))
+                       return PTR_ERR(dir);
+               name++; /* skip initial '_' */
+       }
+
+       if (!IS_ENCRYPTED(dir)) {
+               oname->name = fname->name;
+               oname->len = fname->name_len;
+               ret = 0;
+               goto out_inode;
+       }
+
+       ret = ceph_fscrypt_prepare_readdir(dir);
+       if (ret)
+               goto out_inode;
+
+       /*
+        * Use the raw dentry name as sent by the MDS instead of
+        * generating a nokey name via fscrypt.
+        */
+       if (!fscrypt_has_encryption_key(dir)) {
+               if (fname->no_copy)
+                       oname->name = fname->name;
+               else
+                       memcpy(oname->name, fname->name, fname->name_len);
+               oname->len = fname->name_len;
+               if (is_nokey)
+                       *is_nokey = true;
+               ret = 0;
+               goto out_inode;
+       }
+
+       if (fname->ctext_len == 0) {
+               int declen;
+
+               if (!tname) {
+                       ret = fscrypt_fname_alloc_buffer(NAME_MAX, &_tname);
+                       if (ret)
+                               goto out_inode;
+                       tname = &_tname;
+               }
+
+               declen = ceph_base64_decode(name, name_len, tname->name);
+               if (declen <= 0) {
+                       ret = -EIO;
+                       goto out;
+               }
+               iname.name = tname->name;
+               iname.len = declen;
+       } else {
+               iname.name = fname->ctext;
+               iname.len = fname->ctext_len;
+       }
+
+       ret = fscrypt_fname_disk_to_usr(dir, 0, 0, &iname, oname);
+       if (!ret && (dir != fname->dir)) {
+               char tmp_buf[CEPH_BASE64_CHARS(NAME_MAX)];
+
+               name_len = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld",
+                                   oname->len, oname->name, dir->i_ino);
+               memcpy(oname->name, tmp_buf, name_len);
+               oname->len = name_len;
+       }
+
+out:
+       fscrypt_fname_free_buffer(&_tname);
+out_inode:
+       if ((dir != fname->dir) && !IS_ERR(dir)) {
+               if ((dir->i_state & I_NEW))
+                       discard_new_inode(dir);
+               else
+                       iput(dir);
+       }
+       return ret;
+}
+
+/**
+ * ceph_fscrypt_prepare_readdir - simple __fscrypt_prepare_readdir() wrapper
+ * @dir: directory inode for readdir prep
+ *
+ * Simple wrapper around __fscrypt_prepare_readdir() that will mark directory as
+ * non-complete if this call results in having the directory unlocked.
+ *
+ * Returns:
+ *     1 - if directory was locked and key is now loaded (i.e. dir is unlocked)
+ *     0 - if directory is still locked
+ *   < 0 - if __fscrypt_prepare_readdir() fails
+ */
+int ceph_fscrypt_prepare_readdir(struct inode *dir)
+{
+       bool had_key = fscrypt_has_encryption_key(dir);
+       int err;
+
+       if (!IS_ENCRYPTED(dir))
+               return 0;
+
+       err = __fscrypt_prepare_readdir(dir);
+       if (err)
+               return err;
+       if (!had_key && fscrypt_has_encryption_key(dir)) {
+               /* directory just got unlocked, mark it as not complete */
+               ceph_dir_clear_complete(dir);
+               return 1;
+       }
+       return 0;
+}
+
+int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
+                                 struct page *page, unsigned int len,
+                                 unsigned int offs, u64 lblk_num)
+{
+       dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num);
+       return fscrypt_decrypt_block_inplace(inode, page, len, offs, lblk_num);
+}
+
+int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
+                                 struct page *page, unsigned int len,
+                                 unsigned int offs, u64 lblk_num,
+                                 gfp_t gfp_flags)
+{
+       dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num);
+       return fscrypt_encrypt_block_inplace(inode, page, len, offs, lblk_num,
+                                            gfp_flags);
+}
+
+/**
+ * ceph_fscrypt_decrypt_pages - decrypt an array of pages
+ * @inode: pointer to inode associated with these pages
+ * @page: pointer to page array
+ * @off: offset into the file that the read data starts
+ * @len: max length to decrypt
+ *
+ * Decrypt an array of fscrypt'ed pages and return the amount of
+ * data decrypted. Any data in the page prior to the start of the
+ * first complete block in the read is ignored. Any incomplete
+ * crypto blocks at the end of the array are ignored (and should
+ * probably be zeroed by the caller).
+ *
+ * Returns the length of the decrypted data or a negative errno.
+ */
+int ceph_fscrypt_decrypt_pages(struct inode *inode, struct page **page,
+                              u64 off, int len)
+{
+       int i, num_blocks;
+       u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT;
+       int ret = 0;
+
+       /*
+        * We can't deal with partial blocks on an encrypted file, so mask off
+        * the last bit.
+        */
+       num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK);
+
+       /* Decrypt each block */
+       for (i = 0; i < num_blocks; ++i) {
+               int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT;
+               int pgidx = blkoff >> PAGE_SHIFT;
+               unsigned int pgoffs = offset_in_page(blkoff);
+               int fret;
+
+               fret = ceph_fscrypt_decrypt_block_inplace(inode, page[pgidx],
+                               CEPH_FSCRYPT_BLOCK_SIZE, pgoffs,
+                               baseblk + i);
+               if (fret < 0) {
+                       if (ret == 0)
+                               ret = fret;
+                       break;
+               }
+               ret += CEPH_FSCRYPT_BLOCK_SIZE;
+       }
+       return ret;
+}
+
+/**
+ * ceph_fscrypt_decrypt_extents: decrypt received extents in given buffer
+ * @inode: inode associated with pages being decrypted
+ * @page: pointer to page array
+ * @off: offset into the file that the data in page[0] starts
+ * @map: pointer to extent array
+ * @ext_cnt: length of extent array
+ *
+ * Given an extent map and a page array, decrypt the received data in-place,
+ * skipping holes. Returns the offset into buffer of end of last decrypted
+ * block.
+ */
+int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
+                                u64 off, struct ceph_sparse_extent *map,
+                                u32 ext_cnt)
+{
+       int i, ret = 0;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       u64 objno, objoff;
+       u32 xlen;
+
+       /* Nothing to do for empty array */
+       if (ext_cnt == 0) {
+               dout("%s: empty array, ret 0\n", __func__);
+               return 0;
+       }
+
+       ceph_calc_file_object_mapping(&ci->i_layout, off, map[0].len,
+                                     &objno, &objoff, &xlen);
+
+       for (i = 0; i < ext_cnt; ++i) {
+               struct ceph_sparse_extent *ext = &map[i];
+               int pgsoff = ext->off - objoff;
+               int pgidx = pgsoff >> PAGE_SHIFT;
+               int fret;
+
+               if ((ext->off | ext->len) & ~CEPH_FSCRYPT_BLOCK_MASK) {
+                       pr_warn("%s: bad encrypted sparse extent idx %d off %llx len %llx\n",
+                               __func__, i, ext->off, ext->len);
+                       return -EIO;
+               }
+               fret = ceph_fscrypt_decrypt_pages(inode, &page[pgidx],
+                                                off + pgsoff, ext->len);
+               dout("%s: [%d] 0x%llx~0x%llx fret %d\n", __func__, i,
+                               ext->off, ext->len, fret);
+               if (fret < 0) {
+                       if (ret == 0)
+                               ret = fret;
+                       break;
+               }
+               ret = pgsoff + fret;
+       }
+       dout("%s: ret %d\n", __func__, ret);
+       return ret;
+}
+
+/**
+ * ceph_fscrypt_encrypt_pages - encrypt an array of pages
+ * @inode: pointer to inode associated with these pages
+ * @page: pointer to page array
+ * @off: offset into the file that the data starts
+ * @len: max length to encrypt
+ * @gfp: gfp flags to use for allocation
+ *
+ * Decrypt an array of cleartext pages and return the amount of
+ * data encrypted. Any data in the page prior to the start of the
+ * first complete block in the read is ignored. Any incomplete
+ * crypto blocks at the end of the array are ignored.
+ *
+ * Returns the length of the encrypted data or a negative errno.
+ */
+int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **page, u64 off,
+                               int len, gfp_t gfp)
+{
+       int i, num_blocks;
+       u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT;
+       int ret = 0;
+
+       /*
+        * We can't deal with partial blocks on an encrypted file, so mask off
+        * the last bit.
+        */
+       num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK);
+
+       /* Encrypt each block */
+       for (i = 0; i < num_blocks; ++i) {
+               int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT;
+               int pgidx = blkoff >> PAGE_SHIFT;
+               unsigned int pgoffs = offset_in_page(blkoff);
+               int fret;
+
+               fret = ceph_fscrypt_encrypt_block_inplace(inode, page[pgidx],
+                               CEPH_FSCRYPT_BLOCK_SIZE, pgoffs,
+                               baseblk + i, gfp);
+               if (fret < 0) {
+                       if (ret == 0)
+                               ret = fret;
+                       break;
+               }
+               ret += CEPH_FSCRYPT_BLOCK_SIZE;
+       }
+       return ret;
+}
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h
new file mode 100644 (file)
index 0000000..47e0c31
--- /dev/null
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ceph fscrypt functionality
+ */
+
+#ifndef _CEPH_CRYPTO_H
+#define _CEPH_CRYPTO_H
+
+#include <crypto/sha2.h>
+#include <linux/fscrypt.h>
+
+#define CEPH_FSCRYPT_BLOCK_SHIFT   12
+#define CEPH_FSCRYPT_BLOCK_SIZE    (_AC(1, UL) << CEPH_FSCRYPT_BLOCK_SHIFT)
+#define CEPH_FSCRYPT_BLOCK_MASK           (~(CEPH_FSCRYPT_BLOCK_SIZE-1))
+
+struct ceph_fs_client;
+struct ceph_acl_sec_ctx;
+struct ceph_mds_request;
+
+struct ceph_fname {
+       struct inode    *dir;
+       char            *name;          // b64 encoded, possibly hashed
+       unsigned char   *ctext;         // binary crypttext (if any)
+       u32             name_len;       // length of name buffer
+       u32             ctext_len;      // length of crypttext
+       bool            no_copy;
+};
+
+/*
+ * Header for the crypted file when truncating the size, this
+ * will be sent to MDS, and the MDS will update the encrypted
+ * last block and then truncate the size.
+ */
+struct ceph_fscrypt_truncate_size_header {
+       __u8  ver;
+       __u8  compat;
+
+       /*
+        * It will be sizeof(assert_ver + file_offset + block_size)
+        * if the last block is empty when it's located in a file
+        * hole. Or the data_len will plus CEPH_FSCRYPT_BLOCK_SIZE.
+        */
+       __le32 data_len;
+
+       __le64 change_attr;
+       __le64 file_offset;
+       __le32 block_size;
+} __packed;
+
+struct ceph_fscrypt_auth {
+       __le32  cfa_version;
+       __le32  cfa_blob_len;
+       u8      cfa_blob[FSCRYPT_SET_CONTEXT_MAX_SIZE];
+} __packed;
+
+#define CEPH_FSCRYPT_AUTH_VERSION      1
+static inline u32 ceph_fscrypt_auth_len(struct ceph_fscrypt_auth *fa)
+{
+       u32 ctxsize = le32_to_cpu(fa->cfa_blob_len);
+
+       return offsetof(struct ceph_fscrypt_auth, cfa_blob) + ctxsize;
+}
+
+#ifdef CONFIG_FS_ENCRYPTION
+/*
+ * We want to encrypt filenames when creating them, but the encrypted
+ * versions of those names may have illegal characters in them. To mitigate
+ * that, we base64 encode them, but that gives us a result that can exceed
+ * NAME_MAX.
+ *
+ * Follow a similar scheme to fscrypt itself, and cap the filename to a
+ * smaller size. If the ciphertext name is longer than the value below, then
+ * sha256 hash the remaining bytes.
+ *
+ * For the fscrypt_nokey_name struct the dirhash[2] member is useless in ceph
+ * so the corresponding struct will be:
+ *
+ * struct fscrypt_ceph_nokey_name {
+ *     u8 bytes[157];
+ *     u8 sha256[SHA256_DIGEST_SIZE];
+ * }; // 180 bytes => 240 bytes base64-encoded, which is <= NAME_MAX (255)
+ *
+ * (240 bytes is the maximum size allowed for snapshot names to take into
+ *  account the format: '_<SNAPSHOT-NAME>_<INODE-NUMBER>'.)
+ *
+ * Note that for long names that end up having their tail portion hashed, we
+ * must also store the full encrypted name (in the dentry's alternate_name
+ * field).
+ */
+#define CEPH_NOHASH_NAME_MAX (180 - SHA256_DIGEST_SIZE)
+
+#define CEPH_BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
+
+int ceph_base64_encode(const u8 *src, int srclen, char *dst);
+int ceph_base64_decode(const char *src, int srclen, u8 *dst);
+
+void ceph_fscrypt_set_ops(struct super_block *sb);
+
+void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc);
+
+int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode,
+                                struct ceph_acl_sec_ctx *as);
+void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
+                               struct ceph_acl_sec_ctx *as);
+int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
+                               char *buf);
+int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry,
+                               char *buf);
+
+static inline int ceph_fname_alloc_buffer(struct inode *parent,
+                                         struct fscrypt_str *fname)
+{
+       if (!IS_ENCRYPTED(parent))
+               return 0;
+       return fscrypt_fname_alloc_buffer(NAME_MAX, fname);
+}
+
+static inline void ceph_fname_free_buffer(struct inode *parent,
+                                         struct fscrypt_str *fname)
+{
+       if (IS_ENCRYPTED(parent))
+               fscrypt_fname_free_buffer(fname);
+}
+
+int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
+                     struct fscrypt_str *oname, bool *is_nokey);
+int ceph_fscrypt_prepare_readdir(struct inode *dir);
+
+static inline unsigned int ceph_fscrypt_blocks(u64 off, u64 len)
+{
+       /* crypto blocks cannot span more than one page */
+       BUILD_BUG_ON(CEPH_FSCRYPT_BLOCK_SHIFT > PAGE_SHIFT);
+
+       return ((off+len+CEPH_FSCRYPT_BLOCK_SIZE-1) >> CEPH_FSCRYPT_BLOCK_SHIFT) -
+               (off >> CEPH_FSCRYPT_BLOCK_SHIFT);
+}
+
+/*
+ * If we have an encrypted inode then we must adjust the offset and
+ * range of the on-the-wire read to cover an entire encryption block.
+ * The copy will be done using the original offset and length, after
+ * we've decrypted the result.
+ */
+static inline void ceph_fscrypt_adjust_off_and_len(struct inode *inode,
+                                                  u64 *off, u64 *len)
+{
+       if (IS_ENCRYPTED(inode)) {
+               *len = ceph_fscrypt_blocks(*off, *len) * CEPH_FSCRYPT_BLOCK_SIZE;
+               *off &= CEPH_FSCRYPT_BLOCK_MASK;
+       }
+}
+
+int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
+                                 struct page *page, unsigned int len,
+                                 unsigned int offs, u64 lblk_num);
+int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
+                                 struct page *page, unsigned int len,
+                                 unsigned int offs, u64 lblk_num,
+                                 gfp_t gfp_flags);
+int ceph_fscrypt_decrypt_pages(struct inode *inode, struct page **page,
+                              u64 off, int len);
+int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
+                                u64 off, struct ceph_sparse_extent *map,
+                                u32 ext_cnt);
+int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **page, u64 off,
+                              int len, gfp_t gfp);
+
+static inline struct page *ceph_fscrypt_pagecache_page(struct page *page)
+{
+       return fscrypt_is_bounce_page(page) ? fscrypt_pagecache_page(page) : page;
+}
+
+#else /* CONFIG_FS_ENCRYPTION */
+
+static inline void ceph_fscrypt_set_ops(struct super_block *sb)
+{
+}
+
+static inline void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc)
+{
+}
+
+static inline int ceph_fscrypt_prepare_context(struct inode *dir,
+                                              struct inode *inode,
+                                              struct ceph_acl_sec_ctx *as)
+{
+       if (IS_ENCRYPTED(dir))
+               return -EOPNOTSUPP;
+       return 0;
+}
+
+static inline void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
+                                               struct ceph_acl_sec_ctx *as_ctx)
+{
+}
+
+static inline int ceph_encode_encrypted_dname(struct inode *parent,
+                                             struct qstr *d_name, char *buf)
+{
+       memcpy(buf, d_name->name, d_name->len);
+       return d_name->len;
+}
+
+static inline int ceph_encode_encrypted_fname(struct inode *parent,
+                                             struct dentry *dentry, char *buf)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int ceph_fname_alloc_buffer(struct inode *parent,
+                                         struct fscrypt_str *fname)
+{
+       return 0;
+}
+
+static inline void ceph_fname_free_buffer(struct inode *parent,
+                                         struct fscrypt_str *fname)
+{
+}
+
+static inline int ceph_fname_to_usr(const struct ceph_fname *fname,
+                                   struct fscrypt_str *tname,
+                                   struct fscrypt_str *oname, bool *is_nokey)
+{
+       oname->name = fname->name;
+       oname->len = fname->name_len;
+       return 0;
+}
+
+static inline int ceph_fscrypt_prepare_readdir(struct inode *dir)
+{
+       return 0;
+}
+
+static inline void ceph_fscrypt_adjust_off_and_len(struct inode *inode,
+                                                  u64 *off, u64 *len)
+{
+}
+
+static inline int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
+                                         struct page *page, unsigned int len,
+                                         unsigned int offs, u64 lblk_num)
+{
+       return 0;
+}
+
+static inline int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
+                                         struct page *page, unsigned int len,
+                                         unsigned int offs, u64 lblk_num,
+                                         gfp_t gfp_flags)
+{
+       return 0;
+}
+
+static inline int ceph_fscrypt_decrypt_pages(struct inode *inode,
+                                            struct page **page, u64 off,
+                                            int len)
+{
+       return 0;
+}
+
+static inline int ceph_fscrypt_decrypt_extents(struct inode *inode,
+                                              struct page **page, u64 off,
+                                              struct ceph_sparse_extent *map,
+                                              u32 ext_cnt)
+{
+       return 0;
+}
+
+static inline int ceph_fscrypt_encrypt_pages(struct inode *inode,
+                                            struct page **page, u64 off,
+                                            int len, gfp_t gfp)
+{
+       return 0;
+}
+
+static inline struct page *ceph_fscrypt_pagecache_page(struct page *page)
+{
+       return page;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
+static inline loff_t ceph_fscrypt_page_offset(struct page *page)
+{
+       return page_offset(ceph_fscrypt_pagecache_page(page));
+}
+
+#endif /* _CEPH_CRYPTO_H */
index bdcffb0..854cbdd 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "crypto.h"
 
 /*
  * Directory operations: readdir, lookup, create, link, unlink,
@@ -241,7 +242,9 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
                di = ceph_dentry(dentry);
                if (d_unhashed(dentry) ||
                    d_really_is_negative(dentry) ||
-                   di->lease_shared_gen != shared_gen) {
+                   di->lease_shared_gen != shared_gen ||
+                   ((dentry->d_flags & DCACHE_NOKEY_NAME) &&
+                    fscrypt_has_encryption_key(dir))) {
                        spin_unlock(&dentry->d_lock);
                        dput(dentry);
                        err = -EAGAIN;
@@ -340,6 +343,10 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
                ctx->pos = 2;
        }
 
+       err = ceph_fscrypt_prepare_readdir(inode);
+       if (err < 0)
+               return err;
+
        spin_lock(&ci->i_ceph_lock);
        /* request Fx cap. if have Fx, we don't need to release Fs cap
         * for later create/unlink. */
@@ -389,6 +396,7 @@ more:
                req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
                if (IS_ERR(req))
                        return PTR_ERR(req);
+
                err = ceph_alloc_readdir_reply_buffer(req, inode);
                if (err) {
                        ceph_mdsc_put_request(req);
@@ -402,11 +410,21 @@ more:
                        req->r_inode_drop = CEPH_CAP_FILE_EXCL;
                }
                if (dfi->last_name) {
-                       req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
+                       struct qstr d_name = { .name = dfi->last_name,
+                                              .len = strlen(dfi->last_name) };
+
+                       req->r_path2 = kzalloc(NAME_MAX + 1, GFP_KERNEL);
                        if (!req->r_path2) {
                                ceph_mdsc_put_request(req);
                                return -ENOMEM;
                        }
+
+                       err = ceph_encode_encrypted_dname(inode, &d_name,
+                                                         req->r_path2);
+                       if (err < 0) {
+                               ceph_mdsc_put_request(req);
+                               return err;
+                       }
                } else if (is_hash_order(ctx->pos)) {
                        req->r_args.readdir.offset_hash =
                                cpu_to_le32(fpos_hash(ctx->pos));
@@ -511,15 +529,20 @@ more:
        for (; i < rinfo->dir_nr; i++) {
                struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
 
-               BUG_ON(rde->offset < ctx->pos);
+               if (rde->offset < ctx->pos) {
+                       pr_warn("%s: rde->offset 0x%llx ctx->pos 0x%llx\n",
+                               __func__, rde->offset, ctx->pos);
+                       return -EIO;
+               }
+
+               if (WARN_ON_ONCE(!rde->inode.in))
+                       return -EIO;
 
                ctx->pos = rde->offset;
                dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
                     i, rinfo->dir_nr, ctx->pos,
                     rde->name_len, rde->name, &rde->inode.in);
 
-               BUG_ON(!rde->inode.in);
-
                if (!dir_emit(ctx, rde->name, rde->name_len,
                              ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
                              le32_to_cpu(rde->inode.in->mode) >> 12)) {
@@ -532,6 +555,8 @@ more:
                        dout("filldir stopping us...\n");
                        return 0;
                }
+
+               /* Reset the lengths to their original allocated vals */
                ctx->pos++;
        }
 
@@ -586,7 +611,6 @@ more:
                                        dfi->dir_ordered_count);
                spin_unlock(&ci->i_ceph_lock);
        }
-
        dout("readdir %p file %p done.\n", inode, file);
        return 0;
 }
@@ -760,6 +784,18 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
        if (dentry->d_name.len > NAME_MAX)
                return ERR_PTR(-ENAMETOOLONG);
 
+       if (IS_ENCRYPTED(dir)) {
+               bool had_key = fscrypt_has_encryption_key(dir);
+
+               err = fscrypt_prepare_lookup_partial(dir, dentry);
+               if (err < 0)
+                       return ERR_PTR(err);
+
+               /* mark directory as incomplete if it has been unlocked */
+               if (!had_key && fscrypt_has_encryption_key(dir))
+                       ceph_dir_clear_complete(dir);
+       }
+
        /* can we conclude ENOENT locally? */
        if (d_really_is_negative(dentry)) {
                struct ceph_inode_info *ci = ceph_inode(dir);
@@ -865,13 +901,6 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
                goto out;
        }
 
-       err = ceph_pre_init_acls(dir, &mode, &as_ctx);
-       if (err < 0)
-               goto out;
-       err = ceph_security_init_secctx(dentry, mode, &as_ctx);
-       if (err < 0)
-               goto out;
-
        dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
             dir, dentry, mode, rdev);
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
@@ -879,6 +908,17 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
                err = PTR_ERR(req);
                goto out;
        }
+
+       req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+       if (IS_ERR(req->r_new_inode)) {
+               err = PTR_ERR(req->r_new_inode);
+               req->r_new_inode = NULL;
+               goto out_req;
+       }
+
+       if (S_ISREG(mode) && IS_ENCRYPTED(dir))
+               set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        req->r_parent = dir;
@@ -889,13 +929,13 @@ static int ceph_mknod(struct mnt_idmap *idmap, struct inode *dir,
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
                             CEPH_CAP_XATTR_EXCL;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-       if (as_ctx.pagelist) {
-               req->r_pagelist = as_ctx.pagelist;
-               as_ctx.pagelist = NULL;
-       }
+
+       ceph_as_ctx_to_req(req, &as_ctx);
+
        err = ceph_mdsc_do_request(mdsc, dir, req);
        if (!err && !req->r_reply_info.head->is_dentry)
                err = ceph_handle_notrace_create(dir, dentry);
+out_req:
        ceph_mdsc_put_request(req);
 out:
        if (!err)
@@ -912,12 +952,50 @@ static int ceph_create(struct mnt_idmap *idmap, struct inode *dir,
        return ceph_mknod(idmap, dir, dentry, mode, 0);
 }
 
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
+                                        const char *dest)
+{
+       int err;
+       int len = strlen(dest);
+       struct fscrypt_str osd_link = FSTR_INIT(NULL, 0);
+
+       err = fscrypt_prepare_symlink(req->r_parent, dest, len, PATH_MAX,
+                                     &osd_link);
+       if (err)
+               goto out;
+
+       err = fscrypt_encrypt_symlink(req->r_new_inode, dest, len, &osd_link);
+       if (err)
+               goto out;
+
+       req->r_path2 = kmalloc(CEPH_BASE64_CHARS(osd_link.len) + 1, GFP_KERNEL);
+       if (!req->r_path2) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       len = ceph_base64_encode(osd_link.name, osd_link.len, req->r_path2);
+       req->r_path2[len] = '\0';
+out:
+       fscrypt_fname_free_buffer(&osd_link);
+       return err;
+}
+#else
+static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
+                                        const char *dest)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
                        struct dentry *dentry, const char *dest)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
        struct ceph_mds_request *req;
        struct ceph_acl_sec_ctx as_ctx = {};
+       umode_t mode = S_IFLNK | 0777;
        int err;
 
        if (ceph_snap(dir) != CEPH_NOSNAP)
@@ -932,38 +1010,48 @@ static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
                goto out;
        }
 
-       err = ceph_security_init_secctx(dentry, S_IFLNK | 0777, &as_ctx);
-       if (err < 0)
-               goto out;
-
        dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
        if (IS_ERR(req)) {
                err = PTR_ERR(req);
                goto out;
        }
-       req->r_path2 = kstrdup(dest, GFP_KERNEL);
-       if (!req->r_path2) {
-               err = -ENOMEM;
-               ceph_mdsc_put_request(req);
-               goto out;
+
+       req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+       if (IS_ERR(req->r_new_inode)) {
+               err = PTR_ERR(req->r_new_inode);
+               req->r_new_inode = NULL;
+               goto out_req;
        }
+
        req->r_parent = dir;
        ihold(dir);
 
+       if (IS_ENCRYPTED(req->r_new_inode)) {
+               err = prep_encrypted_symlink_target(req, dest);
+               if (err)
+                       goto out_req;
+       } else {
+               req->r_path2 = kstrdup(dest, GFP_KERNEL);
+               if (!req->r_path2) {
+                       err = -ENOMEM;
+                       goto out_req;
+               }
+       }
+
        set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
                             CEPH_CAP_XATTR_EXCL;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-       if (as_ctx.pagelist) {
-               req->r_pagelist = as_ctx.pagelist;
-               as_ctx.pagelist = NULL;
-       }
+
+       ceph_as_ctx_to_req(req, &as_ctx);
+
        err = ceph_mdsc_do_request(mdsc, dir, req);
        if (!err && !req->r_reply_info.head->is_dentry)
                err = ceph_handle_notrace_create(dir, dentry);
+out_req:
        ceph_mdsc_put_request(req);
 out:
        if (err)
@@ -1003,14 +1091,12 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
                err = -EDQUOT;
                goto out;
        }
-
-       mode |= S_IFDIR;
-       err = ceph_pre_init_acls(dir, &mode, &as_ctx);
-       if (err < 0)
-               goto out;
-       err = ceph_security_init_secctx(dentry, mode, &as_ctx);
-       if (err < 0)
+       if ((op == CEPH_MDS_OP_MKSNAP) && IS_ENCRYPTED(dir) &&
+           !fscrypt_has_encryption_key(dir)) {
+               err = -ENOKEY;
                goto out;
+       }
+
 
        req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
        if (IS_ERR(req)) {
@@ -1018,6 +1104,14 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
                goto out;
        }
 
+       mode |= S_IFDIR;
+       req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+       if (IS_ERR(req->r_new_inode)) {
+               err = PTR_ERR(req->r_new_inode);
+               req->r_new_inode = NULL;
+               goto out_req;
+       }
+
        req->r_dentry = dget(dentry);
        req->r_num_caps = 2;
        req->r_parent = dir;
@@ -1027,15 +1121,15 @@ static int ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
                             CEPH_CAP_XATTR_EXCL;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-       if (as_ctx.pagelist) {
-               req->r_pagelist = as_ctx.pagelist;
-               as_ctx.pagelist = NULL;
-       }
+
+       ceph_as_ctx_to_req(req, &as_ctx);
+
        err = ceph_mdsc_do_request(mdsc, dir, req);
        if (!err &&
            !req->r_reply_info.head->is_target &&
            !req->r_reply_info.head->is_dentry)
                err = ceph_handle_notrace_create(dir, dentry);
+out_req:
        ceph_mdsc_put_request(req);
 out:
        if (!err)
@@ -1063,6 +1157,10 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
        if (ceph_snap(dir) != CEPH_NOSNAP)
                return -EROFS;
 
+       err = fscrypt_prepare_link(old_dentry, dir, dentry);
+       if (err)
+               return err;
+
        dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
             dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
@@ -1310,6 +1408,11 @@ static int ceph_rename(struct mnt_idmap *idmap, struct inode *old_dir,
        if (err)
                return err;
 
+       err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
+                                    flags);
+       if (err)
+               return err;
+
        dout("rename dir %p dentry %p to dir %p dentry %p\n",
             old_dir, old_dentry, new_dir, new_dentry);
        req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1765,6 +1868,10 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
        struct inode *dir, *inode;
        struct ceph_mds_client *mdsc;
 
+       valid = fscrypt_d_revalidate(dentry, flags);
+       if (valid <= 0)
+               return valid;
+
        if (flags & LOOKUP_RCU) {
                parent = READ_ONCE(dentry->d_parent);
                dir = d_inode_rcu(parent);
@@ -1777,8 +1884,9 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
                inode = d_inode(dentry);
        }
 
-       dout("d_revalidate %p '%pd' inode %p offset 0x%llx\n", dentry,
-            dentry, inode, ceph_dentry(dentry)->offset);
+       dout("d_revalidate %p '%pd' inode %p offset 0x%llx nokey %d\n", dentry,
+            dentry, inode, ceph_dentry(dentry)->offset,
+            !!(dentry->d_flags & DCACHE_NOKEY_NAME));
 
        mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
 
index f780e4e..8559990 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "crypto.h"
 
 /*
  * Basic fh
@@ -535,7 +536,9 @@ static int ceph_get_name(struct dentry *parent, char *name,
 {
        struct ceph_mds_client *mdsc;
        struct ceph_mds_request *req;
+       struct inode *dir = d_inode(parent);
        struct inode *inode = d_inode(child);
+       struct ceph_mds_reply_info_parsed *rinfo;
        int err;
 
        if (ceph_snap(inode) != CEPH_NOSNAP)
@@ -547,30 +550,47 @@ static int ceph_get_name(struct dentry *parent, char *name,
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       inode_lock(d_inode(parent));
-
+       inode_lock(dir);
        req->r_inode = inode;
        ihold(inode);
        req->r_ino2 = ceph_vino(d_inode(parent));
-       req->r_parent = d_inode(parent);
-       ihold(req->r_parent);
+       req->r_parent = dir;
+       ihold(dir);
        set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
        req->r_num_caps = 2;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
+       inode_unlock(dir);
 
-       inode_unlock(d_inode(parent));
+       if (err)
+               goto out;
 
-       if (!err) {
-               struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+       rinfo = &req->r_reply_info;
+       if (!IS_ENCRYPTED(dir)) {
                memcpy(name, rinfo->dname, rinfo->dname_len);
                name[rinfo->dname_len] = 0;
-               dout("get_name %p ino %llx.%llx name %s\n",
-                    child, ceph_vinop(inode), name);
        } else {
-               dout("get_name %p ino %llx.%llx err %d\n",
-                    child, ceph_vinop(inode), err);
-       }
+               struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+               struct ceph_fname fname = { .dir        = dir,
+                                           .name       = rinfo->dname,
+                                           .ctext      = rinfo->altname,
+                                           .name_len   = rinfo->dname_len,
+                                           .ctext_len  = rinfo->altname_len };
+
+               err = ceph_fname_alloc_buffer(dir, &oname);
+               if (err < 0)
+                       goto out;
 
+               err = ceph_fname_to_usr(&fname, NULL, &oname, NULL);
+               if (!err) {
+                       memcpy(name, oname.name, oname.len);
+                       name[oname.len] = 0;
+               }
+               ceph_fname_free_buffer(dir, &oname);
+       }
+out:
+       dout("get_name %p ino %llx.%llx err %d %s%s\n",
+                    child, ceph_vinop(inode), err,
+                    err ? "" : "name ", err ? "" : name);
        ceph_mdsc_put_request(req);
        return err;
 }
index 63efe53..b1da02f 100644 (file)
@@ -366,8 +366,13 @@ int ceph_open(struct inode *inode, struct file *file)
 
        /* filter out O_CREAT|O_EXCL; vfs did that already.  yuck. */
        flags = file->f_flags & ~(O_CREAT|O_EXCL);
-       if (S_ISDIR(inode->i_mode))
+       if (S_ISDIR(inode->i_mode)) {
                flags = O_DIRECTORY;  /* mds likes to know */
+       } else if (S_ISREG(inode->i_mode)) {
+               err = fscrypt_file_open(inode, file);
+               if (err)
+                       return err;
+       }
 
        dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode,
             ceph_vinop(inode), file, flags, file->f_flags);
@@ -604,7 +609,8 @@ out:
        ceph_mdsc_release_dir_caps(req);
 }
 
-static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
+static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
+                                   struct dentry *dentry,
                                    struct file *file, umode_t mode,
                                    struct ceph_mds_request *req,
                                    struct ceph_acl_sec_ctx *as_ctx,
@@ -616,7 +622,6 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
        struct ceph_mds_reply_info_in iinfo = { .in = &in };
        struct ceph_inode_info *ci = ceph_inode(dir);
        struct ceph_dentry_info *di = ceph_dentry(dentry);
-       struct inode *inode;
        struct timespec64 now;
        struct ceph_string *pool_ns;
        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
@@ -625,10 +630,6 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
 
        ktime_get_real_ts64(&now);
 
-       inode = ceph_get_inode(dentry->d_sb, vino);
-       if (IS_ERR(inode))
-               return PTR_ERR(inode);
-
        iinfo.inline_version = CEPH_INLINE_NONE;
        iinfo.change_attr = 1;
        ceph_encode_timespec64(&iinfo.btime, &now);
@@ -686,8 +687,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
                ceph_dir_clear_complete(dir);
                if (!d_unhashed(dentry))
                        d_drop(dentry);
-               if (inode->i_state & I_NEW)
-                       discard_new_inode(inode);
+               discard_new_inode(inode);
        } else {
                struct dentry *dn;
 
@@ -733,6 +733,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
        struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
+       struct inode *new_inode = NULL;
        struct dentry *dn;
        struct ceph_acl_sec_ctx as_ctx = {};
        bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
@@ -755,15 +756,16 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
         */
        flags &= ~O_TRUNC;
 
+retry:
        if (flags & O_CREAT) {
                if (ceph_quota_is_max_files_exceeded(dir))
                        return -EDQUOT;
-               err = ceph_pre_init_acls(dir, &mode, &as_ctx);
-               if (err < 0)
-                       return err;
-               err = ceph_security_init_secctx(dentry, mode, &as_ctx);
-               if (err < 0)
+
+               new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+               if (IS_ERR(new_inode)) {
+                       err = PTR_ERR(new_inode);
                        goto out_ctx;
+               }
                /* Async create can't handle more than a page of xattrs */
                if (as_ctx.pagelist &&
                    !list_is_singular(&as_ctx.pagelist->head))
@@ -772,7 +774,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                /* If it's not being looked up, it's negative */
                return -ENOENT;
        }
-retry:
+
        /* do the open */
        req = prepare_open_request(dir->i_sb, flags, mode);
        if (IS_ERR(req)) {
@@ -787,6 +789,12 @@ retry:
        req->r_args.open.mask = cpu_to_le32(mask);
        req->r_parent = dir;
        ihold(dir);
+       if (IS_ENCRYPTED(dir)) {
+               set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+               err = fscrypt_prepare_lookup_partial(dir, dentry);
+               if (err < 0)
+                       goto out_req;
+       }
 
        if (flags & O_CREAT) {
                struct ceph_file_layout lo;
@@ -794,32 +802,47 @@ retry:
                req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
                                     CEPH_CAP_XATTR_EXCL;
                req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
-               if (as_ctx.pagelist) {
-                       req->r_pagelist = as_ctx.pagelist;
-                       as_ctx.pagelist = NULL;
-               }
-               if (try_async &&
-                   (req->r_dir_caps =
-                     try_prep_async_create(dir, dentry, &lo,
-                                           &req->r_deleg_ino))) {
+
+               ceph_as_ctx_to_req(req, &as_ctx);
+
+               if (try_async && (req->r_dir_caps =
+                                 try_prep_async_create(dir, dentry, &lo,
+                                                       &req->r_deleg_ino))) {
+                       struct ceph_vino vino = { .ino = req->r_deleg_ino,
+                                                 .snap = CEPH_NOSNAP };
                        struct ceph_dentry_info *di = ceph_dentry(dentry);
 
                        set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
                        req->r_args.open.flags |= cpu_to_le32(CEPH_O_EXCL);
                        req->r_callback = ceph_async_create_cb;
 
+                       /* Hash inode before RPC */
+                       new_inode = ceph_get_inode(dir->i_sb, vino, new_inode);
+                       if (IS_ERR(new_inode)) {
+                               err = PTR_ERR(new_inode);
+                               new_inode = NULL;
+                               goto out_req;
+                       }
+                       WARN_ON_ONCE(!(new_inode->i_state & I_NEW));
+
                        spin_lock(&dentry->d_lock);
                        di->flags |= CEPH_DENTRY_ASYNC_CREATE;
                        spin_unlock(&dentry->d_lock);
 
                        err = ceph_mdsc_submit_request(mdsc, dir, req);
                        if (!err) {
-                               err = ceph_finish_async_create(dir, dentry,
-                                                       file, mode, req,
-                                                       &as_ctx, &lo);
+                               err = ceph_finish_async_create(dir, new_inode,
+                                                              dentry, file,
+                                                              mode, req,
+                                                              &as_ctx, &lo);
+                               new_inode = NULL;
                        } else if (err == -EJUKEBOX) {
                                restore_deleg_ino(dir, req->r_deleg_ino);
                                ceph_mdsc_put_request(req);
+                               discard_new_inode(new_inode);
+                               ceph_release_acl_sec_ctx(&as_ctx);
+                               memset(&as_ctx, 0, sizeof(as_ctx));
+                               new_inode = NULL;
                                try_async = false;
                                ceph_put_string(rcu_dereference_raw(lo.pool_ns));
                                goto retry;
@@ -830,6 +853,8 @@ retry:
        }
 
        set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
+       req->r_new_inode = new_inode;
+       new_inode = NULL;
        err = ceph_mdsc_do_request(mdsc, (flags & O_CREAT) ? dir : NULL, req);
        if (err == -ENOENT) {
                dentry = ceph_handle_snapdir(req, dentry);
@@ -858,6 +883,13 @@ retry:
                dout("atomic_open finish_no_open on dn %p\n", dn);
                err = finish_no_open(file, dn);
        } else {
+               if (IS_ENCRYPTED(dir) &&
+                   !fscrypt_has_permitted_context(dir, d_inode(dentry))) {
+                       pr_warn("Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n",
+                               ceph_vinop(dir), ceph_vinop(d_inode(dentry)));
+                       goto out_req;
+               }
+
                dout("atomic_open finish_open on dn %p\n", dn);
                if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
                        struct inode *newino = d_inode(dentry);
@@ -870,6 +902,7 @@ retry:
        }
 out_req:
        ceph_mdsc_put_request(req);
+       iput(new_inode);
 out_ctx:
        ceph_release_acl_sec_ctx(&as_ctx);
        dout("atomic_open result=%d\n", err);
@@ -924,21 +957,24 @@ enum {
  * If we get a short result from the OSD, check against i_size; we need to
  * only return a short read to the caller if we hit EOF.
  */
-static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
-                             int *retry_op)
+ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+                        struct iov_iter *to, int *retry_op,
+                        u64 *last_objver)
 {
-       struct file *file = iocb->ki_filp;
-       struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_osd_client *osdc = &fsc->client->osdc;
        ssize_t ret;
-       u64 off = iocb->ki_pos;
+       u64 off = *ki_pos;
        u64 len = iov_iter_count(to);
        u64 i_size = i_size_read(inode);
+       bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
+       u64 objver = 0;
 
-       dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
-            (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+       dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len);
+
+       if (ceph_inode_is_shutdown(inode))
+               return -EIO;
 
        if (!len)
                return 0;
@@ -962,10 +998,21 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                bool more;
                int idx;
                size_t left;
+               struct ceph_osd_req_op *op;
+               u64 read_off = off;
+               u64 read_len = len;
+
+               /* determine new offset/length if encrypted */
+               ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
+
+               dout("sync_read orig %llu~%llu reading %llu~%llu",
+                    off, len, read_off, read_len);
 
                req = ceph_osdc_new_request(osdc, &ci->i_layout,
-                                       ci->i_vino, off, &len, 0, 1,
-                                       CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
+                                       ci->i_vino, read_off, &read_len, 0, 1,
+                                       sparse ? CEPH_OSD_OP_SPARSE_READ :
+                                                CEPH_OSD_OP_READ,
+                                       CEPH_OSD_FLAG_READ,
                                        NULL, ci->i_truncate_seq,
                                        ci->i_truncate_size, false);
                if (IS_ERR(req)) {
@@ -973,10 +1020,13 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                        break;
                }
 
+               /* adjust len downward if the request truncated the len */
+               if (off + len > read_off + read_len)
+                       len = read_off + read_len - off;
                more = len < iov_iter_count(to);
 
-               num_pages = calc_pages_for(off, len);
-               page_off = off & ~PAGE_MASK;
+               num_pages = calc_pages_for(read_off, read_len);
+               page_off = offset_in_page(off);
                pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
                if (IS_ERR(pages)) {
                        ceph_osdc_put_request(req);
@@ -984,29 +1034,75 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                        break;
                }
 
-               osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
+               osd_req_op_extent_osd_data_pages(req, 0, pages, read_len,
+                                                offset_in_page(read_off),
                                                 false, false);
+
+               op = &req->r_ops[0];
+               if (sparse) {
+                       ret = ceph_alloc_sparse_ext_map(op);
+                       if (ret) {
+                               ceph_osdc_put_request(req);
+                               break;
+                       }
+               }
+
                ceph_osdc_start_request(osdc, req);
                ret = ceph_osdc_wait_request(osdc, req);
 
                ceph_update_read_metrics(&fsc->mdsc->metric,
                                         req->r_start_latency,
                                         req->r_end_latency,
-                                        len, ret);
+                                        read_len, ret);
 
-               ceph_osdc_put_request(req);
+               if (ret > 0)
+                       objver = req->r_version;
 
                i_size = i_size_read(inode);
                dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
                     off, len, ret, i_size, (more ? " MORE" : ""));
 
-               if (ret == -ENOENT)
+               /* Fix it to go to end of extent map */
+               if (sparse && ret >= 0)
+                       ret = ceph_sparse_ext_map_end(op);
+               else if (ret == -ENOENT)
                        ret = 0;
+
+               if (ret > 0 && IS_ENCRYPTED(inode)) {
+                       int fret;
+
+                       fret = ceph_fscrypt_decrypt_extents(inode, pages,
+                                       read_off, op->extent.sparse_ext,
+                                       op->extent.sparse_ext_cnt);
+                       if (fret < 0) {
+                               ret = fret;
+                               ceph_osdc_put_request(req);
+                               break;
+                       }
+
+                       /* account for any partial block at the beginning */
+                       fret -= (off - read_off);
+
+                       /*
+                        * Short read after big offset adjustment?
+                        * Nothing is usable, just call it a zero
+                        * len read.
+                        */
+                       fret = max(fret, 0);
+
+                       /* account for partial block at the end */
+                       ret = min_t(ssize_t, fret, len);
+               }
+
+               ceph_osdc_put_request(req);
+
+               /* Short read but not EOF? Zero out the remainder. */
                if (ret >= 0 && ret < len && (off + ret < i_size)) {
                        int zlen = min(len - ret, i_size - off - ret);
                        int zoff = page_off + ret;
+
                        dout("sync_read zero gap %llu~%llu\n",
-                             off + ret, off + ret + zlen);
+                               off + ret, off + ret + zlen);
                        ceph_zero_page_vector_range(zoff, zlen, pages);
                        ret += zlen;
                }
@@ -1014,15 +1110,16 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                idx = 0;
                left = ret > 0 ? ret : 0;
                while (left > 0) {
-                       size_t len, copied;
-                       page_off = off & ~PAGE_MASK;
-                       len = min_t(size_t, left, PAGE_SIZE - page_off);
+                       size_t plen, copied;
+
+                       plen = min_t(size_t, left, PAGE_SIZE - page_off);
                        SetPageUptodate(pages[idx]);
                        copied = copy_page_to_iter(pages[idx++],
-                                                  page_off, len, to);
+                                                  page_off, plen, to);
                        off += copied;
                        left -= copied;
-                       if (copied < len) {
+                       page_off = 0;
+                       if (copied < plen) {
                                ret = -EFAULT;
                                break;
                        }
@@ -1039,21 +1136,37 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                        break;
        }
 
-       if (off > iocb->ki_pos) {
-               if (off >= i_size) {
-                       *retry_op = CHECK_EOF;
-                       ret = i_size - iocb->ki_pos;
-                       iocb->ki_pos = i_size;
-               } else {
-                       ret = off - iocb->ki_pos;
-                       iocb->ki_pos = off;
+       if (ret > 0) {
+               if (off > *ki_pos) {
+                       if (off >= i_size) {
+                               *retry_op = CHECK_EOF;
+                               ret = i_size - *ki_pos;
+                               *ki_pos = i_size;
+                       } else {
+                               ret = off - *ki_pos;
+                               *ki_pos = off;
+                       }
                }
-       }
 
+               if (last_objver)
+                       *last_objver = objver;
+       }
        dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
        return ret;
 }
 
+static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
+                             int *retry_op)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+
+       dout("sync_read on file %p %llx~%zx %s\n", file, iocb->ki_pos,
+            iov_iter_count(to), (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+
+       return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL);
+}
+
 struct ceph_aio_request {
        struct kiocb *iocb;
        size_t total_len;
@@ -1125,8 +1238,10 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
        struct inode *inode = req->r_inode;
        struct ceph_aio_request *aio_req = req->r_priv;
        struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+       struct ceph_osd_req_op *op = &req->r_ops[0];
        struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric;
        unsigned int len = osd_data->bvec_pos.iter.bi_size;
+       bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ);
 
        BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
        BUG_ON(!osd_data->num_bvecs);
@@ -1147,6 +1262,8 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
                }
                rc = -ENOMEM;
        } else if (!aio_req->write) {
+               if (sparse && rc >= 0)
+                       rc = ceph_sparse_ext_map_end(op);
                if (rc == -ENOENT)
                        rc = 0;
                if (rc >= 0 && len > rc) {
@@ -1283,6 +1400,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
        loff_t pos = iocb->ki_pos;
        bool write = iov_iter_rw(iter) == WRITE;
        bool should_dirty = !write && user_backed_iter(iter);
+       bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD);
 
        if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
                return -EROFS;
@@ -1310,6 +1428,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
        while (iov_iter_count(iter) > 0) {
                u64 size = iov_iter_count(iter);
                ssize_t len;
+               struct ceph_osd_req_op *op;
+               int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ;
 
                if (write)
                        size = min_t(u64, size, fsc->mount_options->wsize);
@@ -1320,8 +1440,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                            vino, pos, &size, 0,
                                            1,
-                                           write ? CEPH_OSD_OP_WRITE :
-                                                   CEPH_OSD_OP_READ,
+                                           write ? CEPH_OSD_OP_WRITE : readop,
                                            flags, snapc,
                                            ci->i_truncate_seq,
                                            ci->i_truncate_size,
@@ -1372,6 +1491,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                }
 
                osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
+               op = &req->r_ops[0];
+               if (sparse) {
+                       ret = ceph_alloc_sparse_ext_map(op);
+                       if (ret) {
+                               ceph_osdc_put_request(req);
+                               break;
+                       }
+               }
 
                if (aio_req) {
                        aio_req->total_len += len;
@@ -1399,8 +1526,11 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 
                size = i_size_read(inode);
                if (!write) {
-                       if (ret == -ENOENT)
+                       if (sparse && ret >= 0)
+                               ret = ceph_sparse_ext_map_end(op);
+                       else if (ret == -ENOENT)
                                ret = 0;
+
                        if (ret >= 0 && ret < len && pos + ret < size) {
                                struct iov_iter i;
                                int zlen = min_t(size_t, len - ret,
@@ -1481,13 +1611,12 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-       struct ceph_vino vino;
+       struct ceph_osd_client *osdc = &fsc->client->osdc;
        struct ceph_osd_request *req;
        struct page **pages;
        u64 len;
        int num_pages;
        int written = 0;
-       int flags;
        int ret;
        bool check_caps = false;
        struct timespec64 mtime = current_time(inode);
@@ -1505,79 +1634,350 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                return ret;
 
        ceph_fscache_invalidate(inode, false);
-       ret = invalidate_inode_pages2_range(inode->i_mapping,
-                                           pos >> PAGE_SHIFT,
-                                           (pos + count - 1) >> PAGE_SHIFT);
-       if (ret < 0)
-               dout("invalidate_inode_pages2_range returned %d\n", ret);
-
-       flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE;
 
        while ((len = iov_iter_count(from)) > 0) {
                size_t left;
                int n;
-
-               vino = ceph_vino(inode);
-               req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-                                           vino, pos, &len, 0, 1,
-                                           CEPH_OSD_OP_WRITE, flags, snapc,
-                                           ci->i_truncate_seq,
-                                           ci->i_truncate_size,
-                                           false);
-               if (IS_ERR(req)) {
-                       ret = PTR_ERR(req);
-                       break;
-               }
+               u64 write_pos = pos;
+               u64 write_len = len;
+               u64 objnum, objoff;
+               u32 xlen;
+               u64 assert_ver = 0;
+               bool rmw;
+               bool first, last;
+               struct iov_iter saved_iter = *from;
+               size_t off;
+
+               ceph_fscrypt_adjust_off_and_len(inode, &write_pos, &write_len);
+
+               /* clamp the length to the end of first object */
+               ceph_calc_file_object_mapping(&ci->i_layout, write_pos,
+                                             write_len, &objnum, &objoff,
+                                             &xlen);
+               write_len = xlen;
+
+               /* adjust len downward if it goes beyond current object */
+               if (pos + len > write_pos + write_len)
+                       len = write_pos + write_len - pos;
 
                /*
-                * write from beginning of first page,
-                * regardless of io alignment
+                * If we had to adjust the length or position to align with a
+                * crypto block, then we must do a read/modify/write cycle. We
+                * use a version assertion to redrive the thing if something
+                * changes in between.
                 */
-               num_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+               first = pos != write_pos;
+               last = (pos + len) != (write_pos + write_len);
+               rmw = first || last;
+
+               dout("sync_write ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n",
+                    ci->i_vino.ino, pos, len, write_pos, write_len,
+                    rmw ? "" : "no ");
 
+               /*
+                * The data is emplaced into the page as it would be if it were
+                * in an array of pagecache pages.
+                */
+               num_pages = calc_pages_for(write_pos, write_len);
                pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
                if (IS_ERR(pages)) {
                        ret = PTR_ERR(pages);
-                       goto out;
+                       break;
+               }
+
+               /* Do we need to preload the pages? */
+               if (rmw) {
+                       u64 first_pos = write_pos;
+                       u64 last_pos = (write_pos + write_len) - CEPH_FSCRYPT_BLOCK_SIZE;
+                       u64 read_len = CEPH_FSCRYPT_BLOCK_SIZE;
+                       struct ceph_osd_req_op *op;
+
+                       /* We should only need to do this for encrypted inodes */
+                       WARN_ON_ONCE(!IS_ENCRYPTED(inode));
+
+                       /* No need to do two reads if first and last blocks are same */
+                       if (first && last_pos == first_pos)
+                               last = false;
+
+                       /*
+                        * Allocate a read request for one or two extents,
+                        * depending on how the request was aligned.
+                        */
+                       req = ceph_osdc_new_request(osdc, &ci->i_layout,
+                                       ci->i_vino, first ? first_pos : last_pos,
+                                       &read_len, 0, (first && last) ? 2 : 1,
+                                       CEPH_OSD_OP_SPARSE_READ, CEPH_OSD_FLAG_READ,
+                                       NULL, ci->i_truncate_seq,
+                                       ci->i_truncate_size, false);
+                       if (IS_ERR(req)) {
+                               ceph_release_page_vector(pages, num_pages);
+                               ret = PTR_ERR(req);
+                               break;
+                       }
+
+                       /* Something is misaligned! */
+                       if (read_len != CEPH_FSCRYPT_BLOCK_SIZE) {
+                               ceph_osdc_put_request(req);
+                               ceph_release_page_vector(pages, num_pages);
+                               ret = -EIO;
+                               break;
+                       }
+
+                       /* Add extent for first block? */
+                       op = &req->r_ops[0];
+
+                       if (first) {
+                               osd_req_op_extent_osd_data_pages(req, 0, pages,
+                                                        CEPH_FSCRYPT_BLOCK_SIZE,
+                                                        offset_in_page(first_pos),
+                                                        false, false);
+                               /* We only expect a single extent here */
+                               ret = __ceph_alloc_sparse_ext_map(op, 1);
+                               if (ret) {
+                                       ceph_osdc_put_request(req);
+                                       ceph_release_page_vector(pages, num_pages);
+                                       break;
+                               }
+                       }
+
+                       /* Add extent for last block */
+                       if (last) {
+                               /* Init the other extent if first extent has been used */
+                               if (first) {
+                                       op = &req->r_ops[1];
+                                       osd_req_op_extent_init(req, 1,
+                                                       CEPH_OSD_OP_SPARSE_READ,
+                                                       last_pos, CEPH_FSCRYPT_BLOCK_SIZE,
+                                                       ci->i_truncate_size,
+                                                       ci->i_truncate_seq);
+                               }
+
+                               ret = __ceph_alloc_sparse_ext_map(op, 1);
+                               if (ret) {
+                                       ceph_osdc_put_request(req);
+                                       ceph_release_page_vector(pages, num_pages);
+                                       break;
+                               }
+
+                               osd_req_op_extent_osd_data_pages(req, first ? 1 : 0,
+                                                       &pages[num_pages - 1],
+                                                       CEPH_FSCRYPT_BLOCK_SIZE,
+                                                       offset_in_page(last_pos),
+                                                       false, false);
+                       }
+
+                       ceph_osdc_start_request(osdc, req);
+                       ret = ceph_osdc_wait_request(osdc, req);
+
+                       /* FIXME: length field is wrong if there are 2 extents */
+                       ceph_update_read_metrics(&fsc->mdsc->metric,
+                                                req->r_start_latency,
+                                                req->r_end_latency,
+                                                read_len, ret);
+
+                       /* Ok if object is not already present */
+                       if (ret == -ENOENT) {
+                               /*
+                                * If there is no object, then we can't assert
+                                * on its version. Set it to 0, and we'll use an
+                                * exclusive create instead.
+                                */
+                               ceph_osdc_put_request(req);
+                               ret = 0;
+
+                               /*
+                                * zero out the soon-to-be uncopied parts of the
+                                * first and last pages.
+                                */
+                               if (first)
+                                       zero_user_segment(pages[0], 0,
+                                                         offset_in_page(first_pos));
+                               if (last)
+                                       zero_user_segment(pages[num_pages - 1],
+                                                         offset_in_page(last_pos),
+                                                         PAGE_SIZE);
+                       } else {
+                               if (ret < 0) {
+                                       ceph_osdc_put_request(req);
+                                       ceph_release_page_vector(pages, num_pages);
+                                       break;
+                               }
+
+                               op = &req->r_ops[0];
+                               if (op->extent.sparse_ext_cnt == 0) {
+                                       if (first)
+                                               zero_user_segment(pages[0], 0,
+                                                                 offset_in_page(first_pos));
+                                       else
+                                               zero_user_segment(pages[num_pages - 1],
+                                                                 offset_in_page(last_pos),
+                                                                 PAGE_SIZE);
+                               } else if (op->extent.sparse_ext_cnt != 1 ||
+                                          ceph_sparse_ext_map_end(op) !=
+                                               CEPH_FSCRYPT_BLOCK_SIZE) {
+                                       ret = -EIO;
+                                       ceph_osdc_put_request(req);
+                                       ceph_release_page_vector(pages, num_pages);
+                                       break;
+                               }
+
+                               if (first && last) {
+                                       op = &req->r_ops[1];
+                                       if (op->extent.sparse_ext_cnt == 0) {
+                                               zero_user_segment(pages[num_pages - 1],
+                                                                 offset_in_page(last_pos),
+                                                                 PAGE_SIZE);
+                                       } else if (op->extent.sparse_ext_cnt != 1 ||
+                                                  ceph_sparse_ext_map_end(op) !=
+                                                       CEPH_FSCRYPT_BLOCK_SIZE) {
+                                               ret = -EIO;
+                                               ceph_osdc_put_request(req);
+                                               ceph_release_page_vector(pages, num_pages);
+                                               break;
+                                       }
+                               }
+
+                               /* Grab assert version. It must be non-zero. */
+                               assert_ver = req->r_version;
+                               WARN_ON_ONCE(ret > 0 && assert_ver == 0);
+
+                               ceph_osdc_put_request(req);
+                               if (first) {
+                                       ret = ceph_fscrypt_decrypt_block_inplace(inode,
+                                                       pages[0], CEPH_FSCRYPT_BLOCK_SIZE,
+                                                       offset_in_page(first_pos),
+                                                       first_pos >> CEPH_FSCRYPT_BLOCK_SHIFT);
+                                       if (ret < 0) {
+                                               ceph_release_page_vector(pages, num_pages);
+                                               break;
+                                       }
+                               }
+                               if (last) {
+                                       ret = ceph_fscrypt_decrypt_block_inplace(inode,
+                                                       pages[num_pages - 1],
+                                                       CEPH_FSCRYPT_BLOCK_SIZE,
+                                                       offset_in_page(last_pos),
+                                                       last_pos >> CEPH_FSCRYPT_BLOCK_SHIFT);
+                                       if (ret < 0) {
+                                               ceph_release_page_vector(pages, num_pages);
+                                               break;
+                                       }
+                               }
+                       }
                }
 
                left = len;
+               off = offset_in_page(pos);
                for (n = 0; n < num_pages; n++) {
-                       size_t plen = min_t(size_t, left, PAGE_SIZE);
-                       ret = copy_page_from_iter(pages[n], 0, plen, from);
+                       size_t plen = min_t(size_t, left, PAGE_SIZE - off);
+
+                       /* copy the data */
+                       ret = copy_page_from_iter(pages[n], off, plen, from);
                        if (ret != plen) {
                                ret = -EFAULT;
                                break;
                        }
+                       off = 0;
                        left -= ret;
                }
-
                if (ret < 0) {
+                       dout("sync_write write failed with %d\n", ret);
                        ceph_release_page_vector(pages, num_pages);
-                       goto out;
+                       break;
                }
 
-               req->r_inode = inode;
+               if (IS_ENCRYPTED(inode)) {
+                       ret = ceph_fscrypt_encrypt_pages(inode, pages,
+                                                        write_pos, write_len,
+                                                        GFP_KERNEL);
+                       if (ret < 0) {
+                               dout("encryption failed with %d\n", ret);
+                               ceph_release_page_vector(pages, num_pages);
+                               break;
+                       }
+               }
 
-               osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
-                                               false, true);
+               req = ceph_osdc_new_request(osdc, &ci->i_layout,
+                                           ci->i_vino, write_pos, &write_len,
+                                           rmw ? 1 : 0, rmw ? 2 : 1,
+                                           CEPH_OSD_OP_WRITE,
+                                           CEPH_OSD_FLAG_WRITE,
+                                           snapc, ci->i_truncate_seq,
+                                           ci->i_truncate_size, false);
+               if (IS_ERR(req)) {
+                       ret = PTR_ERR(req);
+                       ceph_release_page_vector(pages, num_pages);
+                       break;
+               }
 
+               dout("sync_write write op %lld~%llu\n", write_pos, write_len);
+               osd_req_op_extent_osd_data_pages(req, rmw ? 1 : 0, pages, write_len,
+                                                offset_in_page(write_pos), false,
+                                                true);
+               req->r_inode = inode;
                req->r_mtime = mtime;
-               ceph_osdc_start_request(&fsc->client->osdc, req);
-               ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+               /* Set up the assertion */
+               if (rmw) {
+                       /*
+                        * Set up the assertion. If we don't have a version
+                        * number, then the object doesn't exist yet. Use an
+                        * exclusive create instead of a version assertion in
+                        * that case.
+                        */
+                       if (assert_ver) {
+                               osd_req_op_init(req, 0, CEPH_OSD_OP_ASSERT_VER, 0);
+                               req->r_ops[0].assert_ver.ver = assert_ver;
+                       } else {
+                               osd_req_op_init(req, 0, CEPH_OSD_OP_CREATE,
+                                               CEPH_OSD_OP_FLAG_EXCL);
+                       }
+               }
+
+               ceph_osdc_start_request(osdc, req);
+               ret = ceph_osdc_wait_request(osdc, req);
 
                ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
                                          req->r_end_latency, len, ret);
-out:
                ceph_osdc_put_request(req);
                if (ret != 0) {
+                       dout("sync_write osd write returned %d\n", ret);
+                       /* Version changed! Must re-do the rmw cycle */
+                       if ((assert_ver && (ret == -ERANGE || ret == -EOVERFLOW)) ||
+                           (!assert_ver && ret == -EEXIST)) {
+                               /* We should only ever see this on a rmw */
+                               WARN_ON_ONCE(!rmw);
+
+                               /* The version should never go backward */
+                               WARN_ON_ONCE(ret == -EOVERFLOW);
+
+                               *from = saved_iter;
+
+                               /* FIXME: limit number of times we loop? */
+                               continue;
+                       }
                        ceph_set_error_write(ci);
                        break;
                }
 
                ceph_clear_error_write(ci);
+
+               /*
+                * We successfully wrote to a range of the file. Declare
+                * that region of the pagecache invalid.
+                */
+               ret = invalidate_inode_pages2_range(
+                               inode->i_mapping,
+                               pos >> PAGE_SHIFT,
+                               (pos + len - 1) >> PAGE_SHIFT);
+               if (ret < 0) {
+                       dout("invalidate_inode_pages2_range returned %d\n",
+                            ret);
+                       ret = 0;
+               }
                pos += len;
                written += len;
+               dout("sync_write written %d\n", written);
                if (pos > i_size_read(inode)) {
                        check_caps = ceph_inode_set_size(inode, pos);
                        if (check_caps)
@@ -1591,6 +1991,7 @@ out:
                ret = written;
                iocb->ki_pos = pos;
        }
+       dout("sync_write returning %d\n", ret);
        return ret;
 }
 
@@ -1648,7 +2049,9 @@ again:
                     ceph_cap_string(got));
 
                if (!ceph_has_inline_data(ci)) {
-                       if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+                       if (!retry_op &&
+                           (iocb->ki_flags & IOCB_DIRECT) &&
+                           !IS_ENCRYPTED(inode)) {
                                ret = ceph_direct_read_write(iocb, to,
                                                             NULL, NULL);
                                if (ret >= 0 && ret < len)
@@ -1934,7 +2337,7 @@ retry_snap:
 
                /* we might need to revert back to that point */
                data = *from;
-               if (iocb->ki_flags & IOCB_DIRECT)
+               if ((iocb->ki_flags & IOCB_DIRECT) && !IS_ENCRYPTED(inode))
                        written = ceph_direct_read_write(iocb, &data, snapc,
                                                         &prealloc_cf);
                else
@@ -2165,6 +2568,9 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
 
+       if (IS_ENCRYPTED(inode))
+               return -EOPNOTSUPP;
+
        prealloc_cf = ceph_alloc_cap_flush();
        if (!prealloc_cf)
                return -ENOMEM;
@@ -2486,6 +2892,10 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
                return -EOPNOTSUPP;
        }
 
+       /* Every encrypted inode gets its own key, so we can't offload them */
+       if (IS_ENCRYPTED(src_inode) || IS_ENCRYPTED(dst_inode))
+               return -EOPNOTSUPP;
+
        if (len < src_ci->i_layout.object_size)
                return -EOPNOTSUPP; /* no remote copy will be done */
 
index fd05d68..800ab79 100644 (file)
 #include <linux/random.h>
 #include <linux/sort.h>
 #include <linux/iversion.h>
+#include <linux/fscrypt.h>
 
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
+#include "crypto.h"
 #include <linux/ceph/decode.h>
 
 /*
@@ -33,6 +35,7 @@
  */
 
 static const struct inode_operations ceph_symlink_iops;
+static const struct inode_operations ceph_encrypted_symlink_iops;
 
 static void ceph_inode_work(struct work_struct *work);
 
@@ -52,17 +55,99 @@ static int ceph_set_ino_cb(struct inode *inode, void *data)
        return 0;
 }
 
-struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
+/**
+ * ceph_new_inode - allocate a new inode in advance of an expected create
+ * @dir: parent directory for new inode
+ * @dentry: dentry that may eventually point to new inode
+ * @mode: mode of new inode
+ * @as_ctx: pointer to inherited security context
+ *
+ * Allocate a new inode in advance of an operation to create a new inode.
+ * This allocates the inode and sets up the acl_sec_ctx with appropriate
+ * info for the new inode.
+ *
+ * Returns a pointer to the new inode or an ERR_PTR.
+ */
+struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
+                            umode_t *mode, struct ceph_acl_sec_ctx *as_ctx)
+{
+       int err;
+       struct inode *inode;
+
+       inode = new_inode(dir->i_sb);
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+
+       if (!S_ISLNK(*mode)) {
+               err = ceph_pre_init_acls(dir, mode, as_ctx);
+               if (err < 0)
+                       goto out_err;
+       }
+
+       inode->i_state = 0;
+       inode->i_mode = *mode;
+
+       err = ceph_security_init_secctx(dentry, *mode, as_ctx);
+       if (err < 0)
+               goto out_err;
+
+       /*
+        * We'll skip setting fscrypt context for snapshots, leaving that for
+        * the handle_reply().
+        */
+       if (ceph_snap(dir) != CEPH_SNAPDIR) {
+               err = ceph_fscrypt_prepare_context(dir, inode, as_ctx);
+               if (err)
+                       goto out_err;
+       }
+
+       return inode;
+out_err:
+       iput(inode);
+       return ERR_PTR(err);
+}
+
+void ceph_as_ctx_to_req(struct ceph_mds_request *req,
+                       struct ceph_acl_sec_ctx *as_ctx)
+{
+       if (as_ctx->pagelist) {
+               req->r_pagelist = as_ctx->pagelist;
+               as_ctx->pagelist = NULL;
+       }
+       ceph_fscrypt_as_ctx_to_req(req, as_ctx);
+}
+
+/**
+ * ceph_get_inode - find or create/hash a new inode
+ * @sb: superblock to search and allocate in
+ * @vino: vino to search for
+ * @newino: optional new inode to insert if one isn't found (may be NULL)
+ *
+ * Search for or insert a new inode into the hash for the given vino, and
+ * return a reference to it. If new is non-NULL, its reference is consumed.
+ */
+struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino,
+                            struct inode *newino)
 {
        struct inode *inode;
 
        if (ceph_vino_is_reserved(vino))
                return ERR_PTR(-EREMOTEIO);
 
-       inode = iget5_locked(sb, (unsigned long)vino.ino, ceph_ino_compare,
-                            ceph_set_ino_cb, &vino);
-       if (!inode)
+       if (newino) {
+               inode = inode_insert5(newino, (unsigned long)vino.ino,
+                                     ceph_ino_compare, ceph_set_ino_cb, &vino);
+               if (inode != newino)
+                       iput(newino);
+       } else {
+               inode = iget5_locked(sb, (unsigned long)vino.ino,
+                                    ceph_ino_compare, ceph_set_ino_cb, &vino);
+       }
+
+       if (!inode) {
+               dout("No inode found for %llx.%llx\n", vino.ino, vino.snap);
                return ERR_PTR(-ENOMEM);
+       }
 
        dout("get_inode on %llu=%llx.%llx got %p new %d\n", ceph_present_inode(inode),
             ceph_vinop(inode), inode, !!(inode->i_state & I_NEW));
@@ -78,8 +163,9 @@ struct inode *ceph_get_snapdir(struct inode *parent)
                .ino = ceph_ino(parent),
                .snap = CEPH_SNAPDIR,
        };
-       struct inode *inode = ceph_get_inode(parent->i_sb, vino);
+       struct inode *inode = ceph_get_inode(parent->i_sb, vino, NULL);
        struct ceph_inode_info *ci = ceph_inode(inode);
+       int ret = -ENOTDIR;
 
        if (IS_ERR(inode))
                return inode;
@@ -105,6 +191,24 @@ struct inode *ceph_get_snapdir(struct inode *parent)
        ci->i_rbytes = 0;
        ci->i_btime = ceph_inode(parent)->i_btime;
 
+#ifdef CONFIG_FS_ENCRYPTION
+       /* if encrypted, just borrow fscrypt_auth from parent */
+       if (IS_ENCRYPTED(parent)) {
+               struct ceph_inode_info *pci = ceph_inode(parent);
+
+               ci->fscrypt_auth = kmemdup(pci->fscrypt_auth,
+                                          pci->fscrypt_auth_len,
+                                          GFP_KERNEL);
+               if (ci->fscrypt_auth) {
+                       inode->i_flags |= S_ENCRYPTED;
+                       ci->fscrypt_auth_len = pci->fscrypt_auth_len;
+               } else {
+                       dout("Failed to alloc snapdir fscrypt_auth\n");
+                       ret = -ENOMEM;
+                       goto err;
+               }
+       }
+#endif
        if (inode->i_state & I_NEW) {
                inode->i_op = &ceph_snapdir_iops;
                inode->i_fop = &ceph_snapdir_fops;
@@ -118,7 +222,7 @@ err:
                discard_new_inode(inode);
        else
                iput(inode);
-       return ERR_PTR(-ENOTDIR);
+       return ERR_PTR(ret);
 }
 
 const struct inode_operations ceph_file_iops = {
@@ -517,6 +621,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_truncate_seq = 0;
        ci->i_truncate_size = 0;
        ci->i_truncate_pending = 0;
+       ci->i_truncate_pagecache_size = 0;
 
        ci->i_max_size = 0;
        ci->i_reported_size = 0;
@@ -547,6 +652,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        INIT_WORK(&ci->i_work, ceph_inode_work);
        ci->i_work_mask = 0;
        memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
+#ifdef CONFIG_FS_ENCRYPTION
+       ci->fscrypt_auth = NULL;
+       ci->fscrypt_auth_len = 0;
+#endif
        return &ci->netfs.inode;
 }
 
@@ -555,6 +664,10 @@ void ceph_free_inode(struct inode *inode)
        struct ceph_inode_info *ci = ceph_inode(inode);
 
        kfree(ci->i_symlink);
+#ifdef CONFIG_FS_ENCRYPTION
+       kfree(ci->fscrypt_auth);
+#endif
+       fscrypt_free_inode(inode);
        kmem_cache_free(ceph_inode_cachep, ci);
 }
 
@@ -575,6 +688,7 @@ void ceph_evict_inode(struct inode *inode)
        clear_inode(inode);
 
        ceph_fscache_unregister_inode_cookie(ci);
+       fscrypt_put_encryption_info(inode);
 
        __ceph_remove_caps(ci);
 
@@ -650,7 +764,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
                        ceph_fscache_update(inode);
                ci->i_reported_size = size;
                if (truncate_seq != ci->i_truncate_seq) {
-                       dout("truncate_seq %u -> %u\n",
+                       dout("%s truncate_seq %u -> %u\n", __func__,
                             ci->i_truncate_seq, truncate_seq);
                        ci->i_truncate_seq = truncate_seq;
 
@@ -674,11 +788,26 @@ int ceph_fill_file_size(struct inode *inode, int issued,
                        }
                }
        }
-       if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0 &&
-           ci->i_truncate_size != truncate_size) {
-               dout("truncate_size %lld -> %llu\n", ci->i_truncate_size,
-                    truncate_size);
+
+       /*
+        * It's possible that the new sizes of the two consecutive
+        * size truncations will be in the same fscrypt last block,
+        * and we need to truncate the corresponding page caches
+        * anyway.
+        */
+       if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0) {
+               dout("%s truncate_size %lld -> %llu, encrypted %d\n", __func__,
+                    ci->i_truncate_size, truncate_size, !!IS_ENCRYPTED(inode));
+
                ci->i_truncate_size = truncate_size;
+
+               if (IS_ENCRYPTED(inode)) {
+                       dout("%s truncate_pagecache_size %lld -> %llu\n",
+                            __func__, ci->i_truncate_pagecache_size, size);
+                       ci->i_truncate_pagecache_size = size;
+               } else {
+                       ci->i_truncate_pagecache_size = truncate_size;
+               }
        }
        return queue_trunc;
 }
@@ -752,6 +881,34 @@ void ceph_fill_file_time(struct inode *inode, int issued,
                     inode, time_warp_seq, ci->i_time_warp_seq);
 }
 
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym)
+{
+       int declen;
+       u8 *sym;
+
+       sym = kmalloc(enclen + 1, GFP_NOFS);
+       if (!sym)
+               return -ENOMEM;
+
+       declen = ceph_base64_decode(encsym, enclen, sym);
+       if (declen < 0) {
+               pr_err("%s: can't decode symlink (%d). Content: %.*s\n",
+                      __func__, declen, enclen, encsym);
+               kfree(sym);
+               return -EIO;
+       }
+       sym[declen + 1] = '\0';
+       *decsym = sym;
+       return declen;
+}
+#else
+static int decode_encrypted_symlink(const char *encsym, int symlen, u8 **decsym)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 /*
  * Populate an inode based on info from mds.  May be called on new or
  * existing inodes.
@@ -857,15 +1014,20 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
        issued |= __ceph_caps_dirty(ci);
        new_issued = ~issued & info_caps;
 
-       /* directories have fl_stripe_unit set to zero */
-       if (le32_to_cpu(info->layout.fl_stripe_unit))
-               inode->i_blkbits =
-                       fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
-       else
-               inode->i_blkbits = CEPH_BLOCK_SHIFT;
-
        __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
 
+#ifdef CONFIG_FS_ENCRYPTION
+       if (iinfo->fscrypt_auth_len &&
+           ((inode->i_state & I_NEW) || (ci->fscrypt_auth_len == 0))) {
+               kfree(ci->fscrypt_auth);
+               ci->fscrypt_auth_len = iinfo->fscrypt_auth_len;
+               ci->fscrypt_auth = iinfo->fscrypt_auth;
+               iinfo->fscrypt_auth = NULL;
+               iinfo->fscrypt_auth_len = 0;
+               inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED);
+       }
+#endif
+
        if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
            (issued & CEPH_CAP_AUTH_EXCL) == 0) {
                inode->i_mode = mode;
@@ -878,6 +1040,15 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
                ceph_decode_timespec64(&ci->i_snap_btime, &iinfo->snap_btime);
        }
 
+       /* directories have fl_stripe_unit set to zero */
+       if (IS_ENCRYPTED(inode))
+               inode->i_blkbits = CEPH_FSCRYPT_BLOCK_SHIFT;
+       else if (le32_to_cpu(info->layout.fl_stripe_unit))
+               inode->i_blkbits =
+                       fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+       else
+               inode->i_blkbits = CEPH_BLOCK_SHIFT;
+
        if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
            (issued & CEPH_CAP_LINK_EXCL) == 0)
                set_nlink(inode, le32_to_cpu(info->nlink));
@@ -899,6 +1070,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 
        if (new_version ||
            (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+               u64 size = le64_to_cpu(info->size);
                s64 old_pool = ci->i_layout.pool_id;
                struct ceph_string *old_ns;
 
@@ -912,10 +1084,22 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 
                pool_ns = old_ns;
 
+               if (IS_ENCRYPTED(inode) && size &&
+                   iinfo->fscrypt_file_len == sizeof(__le64)) {
+                       u64 fsize = __le64_to_cpu(*(__le64 *)iinfo->fscrypt_file);
+
+                       if (size == round_up(fsize, CEPH_FSCRYPT_BLOCK_SIZE)) {
+                               size = fsize;
+                       } else {
+                               pr_warn("fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n",
+                                       info->size, size);
+                       }
+               }
+
                queue_trunc = ceph_fill_file_size(inode, issued,
                                        le32_to_cpu(info->truncate_seq),
                                        le64_to_cpu(info->truncate_size),
-                                       le64_to_cpu(info->size));
+                                       size);
                /* only update max_size on auth cap */
                if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
                    ci->i_max_size != le64_to_cpu(info->max_size)) {
@@ -975,26 +1159,42 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
                inode->i_fop = &ceph_file_fops;
                break;
        case S_IFLNK:
-               inode->i_op = &ceph_symlink_iops;
                if (!ci->i_symlink) {
                        u32 symlen = iinfo->symlink_len;
                        char *sym;
 
                        spin_unlock(&ci->i_ceph_lock);
 
-                       if (symlen != i_size_read(inode)) {
-                               pr_err("%s %llx.%llx BAD symlink "
-                                       "size %lld\n", __func__,
-                                       ceph_vinop(inode),
-                                       i_size_read(inode));
+                       if (IS_ENCRYPTED(inode)) {
+                               if (symlen != i_size_read(inode))
+                                       pr_err("%s %llx.%llx BAD symlink size %lld\n",
+                                               __func__, ceph_vinop(inode),
+                                               i_size_read(inode));
+
+                               err = decode_encrypted_symlink(iinfo->symlink,
+                                                              symlen, (u8 **)&sym);
+                               if (err < 0) {
+                                       pr_err("%s decoding encrypted symlink failed: %d\n",
+                                               __func__, err);
+                                       goto out;
+                               }
+                               symlen = err;
                                i_size_write(inode, symlen);
                                inode->i_blocks = calc_inode_blocks(symlen);
-                       }
+                       } else {
+                               if (symlen != i_size_read(inode)) {
+                                       pr_err("%s %llx.%llx BAD symlink size %lld\n",
+                                               __func__, ceph_vinop(inode),
+                                               i_size_read(inode));
+                                       i_size_write(inode, symlen);
+                                       inode->i_blocks = calc_inode_blocks(symlen);
+                               }
 
-                       err = -ENOMEM;
-                       sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
-                       if (!sym)
-                               goto out;
+                               err = -ENOMEM;
+                               sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
+                               if (!sym)
+                                       goto out;
+                       }
 
                        spin_lock(&ci->i_ceph_lock);
                        if (!ci->i_symlink)
@@ -1002,7 +1202,17 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
                        else
                                kfree(sym); /* lost a race */
                }
-               inode->i_link = ci->i_symlink;
+
+               if (IS_ENCRYPTED(inode)) {
+                       /*
+                        * Encrypted symlinks need to be decrypted before we can
+                        * cache their targets in i_link. Don't touch it here.
+                        */
+                       inode->i_op = &ceph_encrypted_symlink_iops;
+               } else {
+                       inode->i_link = ci->i_symlink;
+                       inode->i_op = &ceph_symlink_iops;
+               }
                break;
        case S_IFDIR:
                inode->i_op = &ceph_dir_iops;
@@ -1310,8 +1520,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
                if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
                    test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
                    !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
+                       bool is_nokey = false;
                        struct qstr dname;
                        struct dentry *dn, *parent;
+                       struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+                       struct ceph_fname fname = { .dir        = dir,
+                                                   .name       = rinfo->dname,
+                                                   .ctext      = rinfo->altname,
+                                                   .name_len   = rinfo->dname_len,
+                                                   .ctext_len  = rinfo->altname_len };
 
                        BUG_ON(!rinfo->head->is_target);
                        BUG_ON(req->r_dentry);
@@ -1319,8 +1536,20 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
                        parent = d_find_any_alias(dir);
                        BUG_ON(!parent);
 
-                       dname.name = rinfo->dname;
-                       dname.len = rinfo->dname_len;
+                       err = ceph_fname_alloc_buffer(dir, &oname);
+                       if (err < 0) {
+                               dput(parent);
+                               goto done;
+                       }
+
+                       err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
+                       if (err < 0) {
+                               dput(parent);
+                               ceph_fname_free_buffer(dir, &oname);
+                               goto done;
+                       }
+                       dname.name = oname.name;
+                       dname.len = oname.len;
                        dname.hash = full_name_hash(parent, dname.name, dname.len);
                        tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
                        tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
@@ -1335,9 +1564,15 @@ retry_lookup:
                                     dname.len, dname.name, dn);
                                if (!dn) {
                                        dput(parent);
+                                       ceph_fname_free_buffer(dir, &oname);
                                        err = -ENOMEM;
                                        goto done;
                                }
+                               if (is_nokey) {
+                                       spin_lock(&dn->d_lock);
+                                       dn->d_flags |= DCACHE_NOKEY_NAME;
+                                       spin_unlock(&dn->d_lock);
+                               }
                                err = 0;
                        } else if (d_really_is_positive(dn) &&
                                   (ceph_ino(d_inode(dn)) != tvino.ino ||
@@ -1349,6 +1584,7 @@ retry_lookup:
                                dput(dn);
                                goto retry_lookup;
                        }
+                       ceph_fname_free_buffer(dir, &oname);
 
                        req->r_dentry = dn;
                        dput(parent);
@@ -1552,7 +1788,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
                vino.ino = le64_to_cpu(rde->inode.in->ino);
                vino.snap = le64_to_cpu(rde->inode.in->snapid);
 
-               in = ceph_get_inode(req->r_dentry->d_sb, vino);
+               in = ceph_get_inode(req->r_dentry->d_sb, vino, NULL);
                if (IS_ERR(in)) {
                        err = PTR_ERR(in);
                        dout("new_inode badness got %d\n", err);
@@ -1630,7 +1866,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                             struct ceph_mds_session *session)
 {
        struct dentry *parent = req->r_dentry;
-       struct ceph_inode_info *ci = ceph_inode(d_inode(parent));
+       struct inode *inode = d_inode(parent);
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
        struct qstr dname;
        struct dentry *dn;
@@ -1704,9 +1941,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                tvino.snap = le64_to_cpu(rde->inode.in->snapid);
 
                if (rinfo->hash_order) {
-                       u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
-                                                rde->name, rde->name_len);
-                       hash = ceph_frag_value(hash);
+                       u32 hash = ceph_frag_value(rde->raw_hash);
                        if (hash != last_hash)
                                fpos_offset = 2;
                        last_hash = hash;
@@ -1729,6 +1964,11 @@ retry_lookup:
                                err = -ENOMEM;
                                goto out;
                        }
+                       if (rde->is_nokey) {
+                               spin_lock(&dn->d_lock);
+                               dn->d_flags |= DCACHE_NOKEY_NAME;
+                               spin_unlock(&dn->d_lock);
+                       }
                } else if (d_really_is_positive(dn) &&
                           (ceph_ino(d_inode(dn)) != tvino.ino ||
                            ceph_snap(d_inode(dn)) != tvino.snap)) {
@@ -1754,7 +1994,7 @@ retry_lookup:
                if (d_really_is_positive(dn)) {
                        in = d_inode(dn);
                } else {
-                       in = ceph_get_inode(parent->d_sb, tvino);
+                       in = ceph_get_inode(parent->d_sb, tvino, NULL);
                        if (IS_ERR(in)) {
                                dout("new_inode badness\n");
                                d_drop(dn);
@@ -1927,7 +2167,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
 retry:
        spin_lock(&ci->i_ceph_lock);
        if (ci->i_truncate_pending == 0) {
-               dout("__do_pending_vmtruncate %p none pending\n", inode);
+               dout("%s %p none pending\n", __func__, inode);
                spin_unlock(&ci->i_ceph_lock);
                mutex_unlock(&ci->i_truncate_mutex);
                return;
@@ -1939,8 +2179,7 @@ retry:
         */
        if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
                spin_unlock(&ci->i_ceph_lock);
-               dout("__do_pending_vmtruncate %p flushing snaps first\n",
-                    inode);
+               dout("%s %p flushing snaps first\n", __func__, inode);
                filemap_write_and_wait_range(&inode->i_data, 0,
                                             inode->i_sb->s_maxbytes);
                goto retry;
@@ -1949,9 +2188,9 @@ retry:
        /* there should be no reader or writer */
        WARN_ON_ONCE(ci->i_rd_ref || ci->i_wr_ref);
 
-       to = ci->i_truncate_size;
+       to = ci->i_truncate_pagecache_size;
        wrbuffer_refs = ci->i_wrbuffer_ref;
-       dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
+       dout("%s %p (%d) to %lld\n", __func__, inode,
             ci->i_truncate_pending, to);
        spin_unlock(&ci->i_ceph_lock);
 
@@ -1959,7 +2198,7 @@ retry:
        truncate_pagecache(inode, to);
 
        spin_lock(&ci->i_ceph_lock);
-       if (to == ci->i_truncate_size) {
+       if (to == ci->i_truncate_pagecache_size) {
                ci->i_truncate_pending = 0;
                finish = 1;
        }
@@ -2000,6 +2239,32 @@ static void ceph_inode_work(struct work_struct *work)
        iput(inode);
 }
 
+static const char *ceph_encrypted_get_link(struct dentry *dentry,
+                                          struct inode *inode,
+                                          struct delayed_call *done)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       if (!dentry)
+               return ERR_PTR(-ECHILD);
+
+       return fscrypt_get_symlink(inode, ci->i_symlink, i_size_read(inode),
+                                  done);
+}
+
+static int ceph_encrypted_symlink_getattr(struct mnt_idmap *idmap,
+                                         const struct path *path,
+                                         struct kstat *stat, u32 request_mask,
+                                         unsigned int query_flags)
+{
+       int ret;
+
+       ret = ceph_getattr(idmap, path, stat, request_mask, query_flags);
+       if (ret)
+               return ret;
+       return fscrypt_symlink_getattr(path, stat);
+}
+
 /*
  * symlinks
  */
@@ -2010,20 +2275,173 @@ static const struct inode_operations ceph_symlink_iops = {
        .listxattr = ceph_listxattr,
 };
 
-int __ceph_setattr(struct inode *inode, struct iattr *attr)
+static const struct inode_operations ceph_encrypted_symlink_iops = {
+       .get_link = ceph_encrypted_get_link,
+       .setattr = ceph_setattr,
+       .getattr = ceph_encrypted_symlink_getattr,
+       .listxattr = ceph_listxattr,
+};
+
+/*
+ * Transfer the encrypted last block to the MDS and the MDS
+ * will help update it when truncating a smaller size.
+ *
+ * We don't support a PAGE_SIZE that is smaller than the
+ * CEPH_FSCRYPT_BLOCK_SIZE.
+ */
+static int fill_fscrypt_truncate(struct inode *inode,
+                                struct ceph_mds_request *req,
+                                struct iattr *attr)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       int boff = attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE;
+       loff_t pos, orig_pos = round_down(attr->ia_size,
+                                         CEPH_FSCRYPT_BLOCK_SIZE);
+       u64 block = orig_pos >> CEPH_FSCRYPT_BLOCK_SHIFT;
+       struct ceph_pagelist *pagelist = NULL;
+       struct kvec iov = {0};
+       struct iov_iter iter;
+       struct page *page = NULL;
+       struct ceph_fscrypt_truncate_size_header header;
+       int retry_op = 0;
+       int len = CEPH_FSCRYPT_BLOCK_SIZE;
+       loff_t i_size = i_size_read(inode);
+       int got, ret, issued;
+       u64 objver;
+
+       ret = __ceph_get_caps(inode, NULL, CEPH_CAP_FILE_RD, 0, -1, &got);
+       if (ret < 0)
+               return ret;
+
+       issued = __ceph_caps_issued(ci, NULL);
+
+       dout("%s size %lld -> %lld got cap refs on %s, issued %s\n", __func__,
+            i_size, attr->ia_size, ceph_cap_string(got),
+            ceph_cap_string(issued));
+
+       /* Try to writeback the dirty pagecaches */
+       if (issued & (CEPH_CAP_FILE_BUFFER)) {
+               loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1;
+
+               ret = filemap_write_and_wait_range(inode->i_mapping,
+                                                  orig_pos, lend);
+               if (ret < 0)
+                       goto out;
+       }
+
+       page = __page_cache_alloc(GFP_KERNEL);
+       if (page == NULL) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       pagelist = ceph_pagelist_alloc(GFP_KERNEL);
+       if (!pagelist) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       iov.iov_base = kmap_local_page(page);
+       iov.iov_len = len;
+       iov_iter_kvec(&iter, READ, &iov, 1, len);
+
+       pos = orig_pos;
+       ret = __ceph_sync_read(inode, &pos, &iter, &retry_op, &objver);
+       if (ret < 0)
+               goto out;
+
+       /* Insert the header first */
+       header.ver = 1;
+       header.compat = 1;
+       header.change_attr = cpu_to_le64(inode_peek_iversion_raw(inode));
+
+       /*
+        * Always set the block_size to CEPH_FSCRYPT_BLOCK_SIZE,
+        * because in MDS it may need this to do the truncate.
+        */
+       header.block_size = cpu_to_le32(CEPH_FSCRYPT_BLOCK_SIZE);
+
+       /*
+        * If we hit a hole here, we should just skip filling
+        * the fscrypt for the request, because once the fscrypt
+        * is enabled, the file will be split into many blocks
+        * with the size of CEPH_FSCRYPT_BLOCK_SIZE, if there
+        * has a hole, the hole size should be multiple of block
+        * size.
+        *
+        * If the Rados object doesn't exist, it will be set to 0.
+        */
+       if (!objver) {
+               dout("%s hit hole, ppos %lld < size %lld\n", __func__,
+                    pos, i_size);
+
+               header.data_len = cpu_to_le32(8 + 8 + 4);
+               header.file_offset = 0;
+               ret = 0;
+       } else {
+               header.data_len = cpu_to_le32(8 + 8 + 4 + CEPH_FSCRYPT_BLOCK_SIZE);
+               header.file_offset = cpu_to_le64(orig_pos);
+
+               dout("%s encrypt block boff/bsize %d/%lu\n", __func__,
+                    boff, CEPH_FSCRYPT_BLOCK_SIZE);
+
+               /* truncate and zero out the extra contents for the last block */
+               memset(iov.iov_base + boff, 0, PAGE_SIZE - boff);
+
+               /* encrypt the last block */
+               ret = ceph_fscrypt_encrypt_block_inplace(inode, page,
+                                                   CEPH_FSCRYPT_BLOCK_SIZE,
+                                                   0, block,
+                                                   GFP_KERNEL);
+               if (ret)
+                       goto out;
+       }
+
+       /* Insert the header */
+       ret = ceph_pagelist_append(pagelist, &header, sizeof(header));
+       if (ret)
+               goto out;
+
+       if (header.block_size) {
+               /* Append the last block contents to pagelist */
+               ret = ceph_pagelist_append(pagelist, iov.iov_base,
+                                          CEPH_FSCRYPT_BLOCK_SIZE);
+               if (ret)
+                       goto out;
+       }
+       req->r_pagelist = pagelist;
+out:
+       dout("%s %p size dropping cap refs on %s\n", __func__,
+            inode, ceph_cap_string(got));
+       ceph_put_cap_refs(ci, got);
+       if (iov.iov_base)
+               kunmap_local(iov.iov_base);
+       if (page)
+               __free_pages(page, 0);
+       if (ret && pagelist)
+               ceph_pagelist_release(pagelist);
+       return ret;
+}
+
+int __ceph_setattr(struct inode *inode, struct iattr *attr,
+                  struct ceph_iattr *cia)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        unsigned int ia_valid = attr->ia_valid;
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
        struct ceph_cap_flush *prealloc_cf;
+       loff_t isize = i_size_read(inode);
        int issued;
        int release = 0, dirtied = 0;
        int mask = 0;
        int err = 0;
        int inode_dirty_flags = 0;
        bool lock_snap_rwsem = false;
+       bool fill_fscrypt;
+       int truncate_retry = 20; /* The RMW will take around 50ms */
 
+retry:
        prealloc_cf = ceph_alloc_cap_flush();
        if (!prealloc_cf)
                return -ENOMEM;
@@ -2035,6 +2453,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                return PTR_ERR(req);
        }
 
+       fill_fscrypt = false;
        spin_lock(&ci->i_ceph_lock);
        issued = __ceph_caps_issued(ci, NULL);
 
@@ -2050,6 +2469,43 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
        }
 
        dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       if (cia && cia->fscrypt_auth) {
+               u32 len = ceph_fscrypt_auth_len(cia->fscrypt_auth);
+
+               if (len > sizeof(*cia->fscrypt_auth)) {
+                       err = -EINVAL;
+                       spin_unlock(&ci->i_ceph_lock);
+                       goto out;
+               }
+
+               dout("setattr %llx:%llx fscrypt_auth len %u to %u)\n",
+                       ceph_vinop(inode), ci->fscrypt_auth_len, len);
+
+               /* It should never be re-set once set */
+               WARN_ON_ONCE(ci->fscrypt_auth);
+
+               if (issued & CEPH_CAP_AUTH_EXCL) {
+                       dirtied |= CEPH_CAP_AUTH_EXCL;
+                       kfree(ci->fscrypt_auth);
+                       ci->fscrypt_auth = (u8 *)cia->fscrypt_auth;
+                       ci->fscrypt_auth_len = len;
+               } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
+                          ci->fscrypt_auth_len != len ||
+                          memcmp(ci->fscrypt_auth, cia->fscrypt_auth, len)) {
+                       req->r_fscrypt_auth = cia->fscrypt_auth;
+                       mask |= CEPH_SETATTR_FSCRYPT_AUTH;
+                       release |= CEPH_CAP_AUTH_SHARED;
+               }
+               cia->fscrypt_auth = NULL;
+       }
+#else
+       if (cia && cia->fscrypt_auth) {
+               err = -EINVAL;
+               spin_unlock(&ci->i_ceph_lock);
+               goto out;
+       }
+#endif /* CONFIG_FS_ENCRYPTION */
 
        if (ia_valid & ATTR_UID) {
                dout("setattr %p uid %d -> %d\n", inode,
@@ -2119,10 +2575,27 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                }
        }
        if (ia_valid & ATTR_SIZE) {
-               loff_t isize = i_size_read(inode);
-
                dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size);
-               if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
+               /*
+                * Only when the new size is smaller and not aligned to
+                * CEPH_FSCRYPT_BLOCK_SIZE will the RMW is needed.
+                */
+               if (IS_ENCRYPTED(inode) && attr->ia_size < isize &&
+                   (attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE)) {
+                       mask |= CEPH_SETATTR_SIZE;
+                       release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
+                                  CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
+                       set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+                       mask |= CEPH_SETATTR_FSCRYPT_FILE;
+                       req->r_args.setattr.size =
+                               cpu_to_le64(round_up(attr->ia_size,
+                                                    CEPH_FSCRYPT_BLOCK_SIZE));
+                       req->r_args.setattr.old_size =
+                               cpu_to_le64(round_up(isize,
+                                                    CEPH_FSCRYPT_BLOCK_SIZE));
+                       req->r_fscrypt_file = attr->ia_size;
+                       fill_fscrypt = true;
+               } else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
                        if (attr->ia_size > isize) {
                                i_size_write(inode, attr->ia_size);
                                inode->i_blocks = calc_inode_blocks(attr->ia_size);
@@ -2132,11 +2605,24 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                        }
                } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
                           attr->ia_size != isize) {
-                       req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
-                       req->r_args.setattr.old_size = cpu_to_le64(isize);
                        mask |= CEPH_SETATTR_SIZE;
                        release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
                                   CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
+                       if (IS_ENCRYPTED(inode) && attr->ia_size) {
+                               set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+                               mask |= CEPH_SETATTR_FSCRYPT_FILE;
+                               req->r_args.setattr.size =
+                                       cpu_to_le64(round_up(attr->ia_size,
+                                                            CEPH_FSCRYPT_BLOCK_SIZE));
+                               req->r_args.setattr.old_size =
+                                       cpu_to_le64(round_up(isize,
+                                                            CEPH_FSCRYPT_BLOCK_SIZE));
+                               req->r_fscrypt_file = attr->ia_size;
+                       } else {
+                               req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
+                               req->r_args.setattr.old_size = cpu_to_le64(isize);
+                               req->r_fscrypt_file = 0;
+                       }
                }
        }
        if (ia_valid & ATTR_MTIME) {
@@ -2199,8 +2685,10 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 
        release &= issued;
        spin_unlock(&ci->i_ceph_lock);
-       if (lock_snap_rwsem)
+       if (lock_snap_rwsem) {
                up_read(&mdsc->snap_rwsem);
+               lock_snap_rwsem = false;
+       }
 
        if (inode_dirty_flags)
                __mark_inode_dirty(inode, inode_dirty_flags);
@@ -2212,8 +2700,29 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                req->r_args.setattr.mask = cpu_to_le32(mask);
                req->r_num_caps = 1;
                req->r_stamp = attr->ia_ctime;
+               if (fill_fscrypt) {
+                       err = fill_fscrypt_truncate(inode, req, attr);
+                       if (err)
+                               goto out;
+               }
+
+               /*
+                * The truncate request will return -EAGAIN when the
+                * last block has been updated just before the MDS
+                * successfully gets the xlock for the FILE lock. To
+                * avoid corrupting the file contents we need to retry
+                * it.
+                */
                err = ceph_mdsc_do_request(mdsc, NULL, req);
+               if (err == -EAGAIN && truncate_retry--) {
+                       dout("setattr %p result=%d (%s locally, %d remote), retry it!\n",
+                            inode, err, ceph_cap_string(dirtied), mask);
+                       ceph_mdsc_put_request(req);
+                       ceph_free_cap_flush(prealloc_cf);
+                       goto retry;
+               }
        }
+out:
        dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
             ceph_cap_string(dirtied), mask);
 
@@ -2242,6 +2751,10 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
        if (ceph_inode_is_shutdown(inode))
                return -ESTALE;
 
+       err = fscrypt_prepare_setattr(dentry, attr);
+       if (err)
+               return err;
+
        err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
        if (err != 0)
                return err;
@@ -2254,7 +2767,7 @@ int ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
            ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
                return -EDQUOT;
 
-       err = __ceph_setattr(inode, attr);
+       err = __ceph_setattr(inode, attr, NULL);
 
        if (err >= 0 && (attr->ia_valid & ATTR_MODE))
                err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode);
@@ -2525,8 +3038,12 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path,
                        stat->nlink = 1 + 1 + ci->i_subdirs;
        }
 
-       stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
        stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC;
+       if (IS_ENCRYPTED(inode))
+               stat->attributes |= STATX_ATTR_ENCRYPTED;
+       stat->attributes_mask |= (STATX_ATTR_CHANGE_MONOTONIC |
+                                 STATX_ATTR_ENCRYPTED);
+
        stat->result_mask = request_mask & valid_mask;
        return err;
 }
index deac817..91a8491 100644 (file)
@@ -6,6 +6,7 @@
 #include "mds_client.h"
 #include "ioctl.h"
 #include <linux/ceph/striper.h>
+#include <linux/fscrypt.h>
 
 /*
  * ioctls
@@ -268,9 +269,96 @@ static long ceph_ioctl_syncio(struct file *file)
        return 0;
 }
 
+static int vet_mds_for_fscrypt(struct file *file)
+{
+       int i, ret = -EOPNOTSUPP;
+       struct ceph_mds_client  *mdsc = ceph_sb_to_mdsc(file_inode(file)->i_sb);
+
+       mutex_lock(&mdsc->mutex);
+       for (i = 0; i < mdsc->max_sessions; i++) {
+               struct ceph_mds_session *s = mdsc->sessions[i];
+
+               if (!s)
+                       continue;
+               if (test_bit(CEPHFS_FEATURE_ALTERNATE_NAME, &s->s_features))
+                       ret = 0;
+               break;
+       }
+       mutex_unlock(&mdsc->mutex);
+       return ret;
+}
+
+static long ceph_set_encryption_policy(struct file *file, unsigned long arg)
+{
+       int ret, got = 0;
+       struct inode *inode = file_inode(file);
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       /* encrypted directories can't have striped layout */
+       if (ci->i_layout.stripe_count > 1)
+               return -EINVAL;
+
+       ret = vet_mds_for_fscrypt(file);
+       if (ret)
+               return ret;
+
+       /*
+        * Ensure we hold these caps so that we _know_ that the rstats check
+        * in the empty_dir check is reliable.
+        */
+       ret = ceph_get_caps(file, CEPH_CAP_FILE_SHARED, 0, -1, &got);
+       if (ret)
+               return ret;
+
+       ret = fscrypt_ioctl_set_policy(file, (const void __user *)arg);
+       if (got)
+               ceph_put_cap_refs(ci, got);
+
+       return ret;
+}
+
+static const char *ceph_ioctl_cmd_name(const unsigned int cmd)
+{
+       switch (cmd) {
+       case CEPH_IOC_GET_LAYOUT:
+               return "get_layout";
+       case CEPH_IOC_SET_LAYOUT:
+               return "set_layout";
+       case CEPH_IOC_SET_LAYOUT_POLICY:
+               return "set_layout_policy";
+       case CEPH_IOC_GET_DATALOC:
+               return "get_dataloc";
+       case CEPH_IOC_LAZYIO:
+               return "lazyio";
+       case CEPH_IOC_SYNCIO:
+               return "syncio";
+       case FS_IOC_SET_ENCRYPTION_POLICY:
+               return "set_encryption_policy";
+       case FS_IOC_GET_ENCRYPTION_POLICY:
+               return "get_encryption_policy";
+       case FS_IOC_GET_ENCRYPTION_POLICY_EX:
+               return "get_encryption_policy_ex";
+       case FS_IOC_ADD_ENCRYPTION_KEY:
+               return "add_encryption_key";
+       case FS_IOC_REMOVE_ENCRYPTION_KEY:
+               return "remove_encryption_key";
+       case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
+               return "remove_encryption_key_all_users";
+       case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
+               return "get_encryption_key_status";
+       case FS_IOC_GET_ENCRYPTION_NONCE:
+               return "get_encryption_nonce";
+       default:
+               return "unknown";
+       }
+}
+
 long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-       dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
+       int ret;
+
+       dout("ioctl file %p cmd %s arg %lu\n", file,
+            ceph_ioctl_cmd_name(cmd), arg);
        switch (cmd) {
        case CEPH_IOC_GET_LAYOUT:
                return ceph_ioctl_get_layout(file, (void __user *)arg);
@@ -289,6 +377,43 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
        case CEPH_IOC_SYNCIO:
                return ceph_ioctl_syncio(file);
+
+       case FS_IOC_SET_ENCRYPTION_POLICY:
+               return ceph_set_encryption_policy(file, arg);
+
+       case FS_IOC_GET_ENCRYPTION_POLICY:
+               ret = vet_mds_for_fscrypt(file);
+               if (ret)
+                       return ret;
+               return fscrypt_ioctl_get_policy(file, (void __user *)arg);
+
+       case FS_IOC_GET_ENCRYPTION_POLICY_EX:
+               ret = vet_mds_for_fscrypt(file);
+               if (ret)
+                       return ret;
+               return fscrypt_ioctl_get_policy_ex(file, (void __user *)arg);
+
+       case FS_IOC_ADD_ENCRYPTION_KEY:
+               ret = vet_mds_for_fscrypt(file);
+               if (ret)
+                       return ret;
+               return fscrypt_ioctl_add_key(file, (void __user *)arg);
+
+       case FS_IOC_REMOVE_ENCRYPTION_KEY:
+               return fscrypt_ioctl_remove_key(file, (void __user *)arg);
+
+       case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
+               return fscrypt_ioctl_remove_key_all_users(file,
+                                                         (void __user *)arg);
+
+       case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
+               return fscrypt_ioctl_get_key_status(file, (void __user *)arg);
+
+       case FS_IOC_GET_ENCRYPTION_NONCE:
+               ret = vet_mds_for_fscrypt(file);
+               if (ret)
+                       return ret;
+               return fscrypt_ioctl_get_nonce(file, (void __user *)arg);
        }
 
        return -ENOTTY;
index 5fb367b..615db14 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "crypto.h"
 
 #include <linux/ceph/ceph_features.h>
 #include <linux/ceph/messenger.h>
@@ -184,8 +185,54 @@ static int parse_reply_info_in(void **p, void *end,
                        info->rsnaps = 0;
                }
 
+               if (struct_v >= 5) {
+                       u32 alen;
+
+                       ceph_decode_32_safe(p, end, alen, bad);
+
+                       while (alen--) {
+                               u32 len;
+
+                               /* key */
+                               ceph_decode_32_safe(p, end, len, bad);
+                               ceph_decode_skip_n(p, end, len, bad);
+                               /* value */
+                               ceph_decode_32_safe(p, end, len, bad);
+                               ceph_decode_skip_n(p, end, len, bad);
+                       }
+               }
+
+               /* fscrypt flag -- ignore */
+               if (struct_v >= 6)
+                       ceph_decode_skip_8(p, end, bad);
+
+               info->fscrypt_auth = NULL;
+               info->fscrypt_auth_len = 0;
+               info->fscrypt_file = NULL;
+               info->fscrypt_file_len = 0;
+               if (struct_v >= 7) {
+                       ceph_decode_32_safe(p, end, info->fscrypt_auth_len, bad);
+                       if (info->fscrypt_auth_len) {
+                               info->fscrypt_auth = kmalloc(info->fscrypt_auth_len,
+                                                            GFP_KERNEL);
+                               if (!info->fscrypt_auth)
+                                       return -ENOMEM;
+                               ceph_decode_copy_safe(p, end, info->fscrypt_auth,
+                                                     info->fscrypt_auth_len, bad);
+                       }
+                       ceph_decode_32_safe(p, end, info->fscrypt_file_len, bad);
+                       if (info->fscrypt_file_len) {
+                               info->fscrypt_file = kmalloc(info->fscrypt_file_len,
+                                                            GFP_KERNEL);
+                               if (!info->fscrypt_file)
+                                       return -ENOMEM;
+                               ceph_decode_copy_safe(p, end, info->fscrypt_file,
+                                                     info->fscrypt_file_len, bad);
+                       }
+               }
                *p = end;
        } else {
+               /* legacy (unversioned) struct */
                if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
                        ceph_decode_64_safe(p, end, info->inline_version, bad);
                        ceph_decode_32_safe(p, end, info->inline_len, bad);
@@ -263,27 +310,47 @@ bad:
 
 static int parse_reply_info_lease(void **p, void *end,
                                  struct ceph_mds_reply_lease **lease,
-                                 u64 features)
+                                 u64 features, u32 *altname_len, u8 **altname)
 {
+       u8 struct_v;
+       u32 struct_len;
+       void *lend;
+
        if (features == (u64)-1) {
-               u8 struct_v, struct_compat;
-               u32 struct_len;
+               u8 struct_compat;
+
                ceph_decode_8_safe(p, end, struct_v, bad);
                ceph_decode_8_safe(p, end, struct_compat, bad);
+
                /* struct_v is expected to be >= 1. we only understand
                 * encoding whose struct_compat == 1. */
                if (!struct_v || struct_compat != 1)
                        goto bad;
+
                ceph_decode_32_safe(p, end, struct_len, bad);
-               ceph_decode_need(p, end, struct_len, bad);
-               end = *p + struct_len;
+       } else {
+               struct_len = sizeof(**lease);
+               *altname_len = 0;
+               *altname = NULL;
        }
 
-       ceph_decode_need(p, end, sizeof(**lease), bad);
+       lend = *p + struct_len;
+       ceph_decode_need(p, end, struct_len, bad);
        *lease = *p;
        *p += sizeof(**lease);
-       if (features == (u64)-1)
-               *p = end;
+
+       if (features == (u64)-1) {
+               if (struct_v >= 2) {
+                       ceph_decode_32_safe(p, end, *altname_len, bad);
+                       ceph_decode_need(p, end, *altname_len, bad);
+                       *altname = *p;
+                       *p += *altname_len;
+               } else {
+                       *altname = NULL;
+                       *altname_len = 0;
+               }
+       }
+       *p = lend;
        return 0;
 bad:
        return -EIO;
@@ -313,7 +380,8 @@ static int parse_reply_info_trace(void **p, void *end,
                info->dname = *p;
                *p += info->dname_len;
 
-               err = parse_reply_info_lease(p, end, &info->dlease, features);
+               err = parse_reply_info_lease(p, end, &info->dlease, features,
+                                            &info->altname_len, &info->altname);
                if (err < 0)
                        goto out_bad;
        }
@@ -339,9 +407,10 @@ out_bad:
  * parse readdir results
  */
 static int parse_reply_info_readdir(void **p, void *end,
-                               struct ceph_mds_reply_info_parsed *info,
-                               u64 features)
+                                   struct ceph_mds_request *req,
+                                   u64 features)
 {
+       struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
        u32 num, i = 0;
        int err;
 
@@ -371,18 +440,87 @@ static int parse_reply_info_readdir(void **p, void *end,
 
        info->dir_nr = num;
        while (num) {
+               struct inode *inode = d_inode(req->r_dentry);
+               struct ceph_inode_info *ci = ceph_inode(inode);
                struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i;
+               struct fscrypt_str tname = FSTR_INIT(NULL, 0);
+               struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+               struct ceph_fname fname;
+               u32 altname_len, _name_len;
+               u8 *altname, *_name;
+
                /* dentry */
-               ceph_decode_32_safe(p, end, rde->name_len, bad);
-               ceph_decode_need(p, end, rde->name_len, bad);
-               rde->name = *p;
-               *p += rde->name_len;
-               dout("parsed dir dname '%.*s'\n", rde->name_len, rde->name);
+               ceph_decode_32_safe(p, end, _name_len, bad);
+               ceph_decode_need(p, end, _name_len, bad);
+               _name = *p;
+               *p += _name_len;
+               dout("parsed dir dname '%.*s'\n", _name_len, _name);
+
+               if (info->hash_order)
+                       rde->raw_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
+                                                     _name, _name_len);
 
                /* dentry lease */
-               err = parse_reply_info_lease(p, end, &rde->lease, features);
+               err = parse_reply_info_lease(p, end, &rde->lease, features,
+                                            &altname_len, &altname);
                if (err)
                        goto out_bad;
+
+               /*
+                * Try to dencrypt the dentry names and update them
+                * in the ceph_mds_reply_dir_entry struct.
+                */
+               fname.dir = inode;
+               fname.name = _name;
+               fname.name_len = _name_len;
+               fname.ctext = altname;
+               fname.ctext_len = altname_len;
+               /*
+                * The _name_len maybe larger than altname_len, such as
+                * when the human readable name length is in range of
+                * (CEPH_NOHASH_NAME_MAX, CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE),
+                * then the copy in ceph_fname_to_usr will corrupt the
+                * data if there has no encryption key.
+                *
+                * Just set the no_copy flag and then if there has no
+                * encryption key the oname.name will be assigned to
+                * _name always.
+                */
+               fname.no_copy = true;
+               if (altname_len == 0) {
+                       /*
+                        * Set tname to _name, and this will be used
+                        * to do the base64_decode in-place. It's
+                        * safe because the decoded string should
+                        * always be shorter, which is 3/4 of origin
+                        * string.
+                        */
+                       tname.name = _name;
+
+                       /*
+                        * Set oname to _name too, and this will be
+                        * used to do the dencryption in-place.
+                        */
+                       oname.name = _name;
+                       oname.len = _name_len;
+               } else {
+                       /*
+                        * This will do the decryption only in-place
+                        * from altname cryptext directly.
+                        */
+                       oname.name = altname;
+                       oname.len = altname_len;
+               }
+               rde->is_nokey = false;
+               err = ceph_fname_to_usr(&fname, &tname, &oname, &rde->is_nokey);
+               if (err) {
+                       pr_err("%s unable to decode %.*s, got %d\n", __func__,
+                              _name_len, _name, err);
+                       goto out_bad;
+               }
+               rde->name = oname.name;
+               rde->name_len = oname.len;
+
                /* inode */
                err = parse_reply_info_in(p, end, &rde->inode, features);
                if (err < 0)
@@ -581,15 +719,16 @@ bad:
  * parse extra results
  */
 static int parse_reply_info_extra(void **p, void *end,
-                                 struct ceph_mds_reply_info_parsed *info,
+                                 struct ceph_mds_request *req,
                                  u64 features, struct ceph_mds_session *s)
 {
+       struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
        u32 op = le32_to_cpu(info->head->op);
 
        if (op == CEPH_MDS_OP_GETFILELOCK)
                return parse_reply_info_filelock(p, end, info, features);
        else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
-               return parse_reply_info_readdir(p, end, info, features);
+               return parse_reply_info_readdir(p, end, req, features);
        else if (op == CEPH_MDS_OP_CREATE)
                return parse_reply_info_create(p, end, info, features, s);
        else if (op == CEPH_MDS_OP_GETVXATTR)
@@ -602,9 +741,9 @@ static int parse_reply_info_extra(void **p, void *end,
  * parse entire mds reply
  */
 static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
-                           struct ceph_mds_reply_info_parsed *info,
-                           u64 features)
+                           struct ceph_mds_request *req, u64 features)
 {
+       struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
        void *p, *end;
        u32 len;
        int err;
@@ -626,7 +765,7 @@ static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
        ceph_decode_32_safe(&p, end, len, bad);
        if (len > 0) {
                ceph_decode_need(&p, end, len, bad);
-               err = parse_reply_info_extra(&p, p+len, info, features, s);
+               err = parse_reply_info_extra(&p, p+len, req, features, s);
                if (err < 0)
                        goto out_bad;
        }
@@ -651,8 +790,21 @@ out_bad:
 
 static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
 {
+       int i;
+
+       kfree(info->diri.fscrypt_auth);
+       kfree(info->diri.fscrypt_file);
+       kfree(info->targeti.fscrypt_auth);
+       kfree(info->targeti.fscrypt_file);
        if (!info->dir_entries)
                return;
+
+       for (i = 0; i < info->dir_nr; i++) {
+               struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i;
+
+               kfree(rde->inode.fscrypt_auth);
+               kfree(rde->inode.fscrypt_file);
+       }
        free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
 }
 
@@ -945,6 +1097,7 @@ void ceph_mdsc_release_request(struct kref *kref)
                iput(req->r_parent);
        }
        iput(req->r_target_inode);
+       iput(req->r_new_inode);
        if (req->r_dentry)
                dput(req->r_dentry);
        if (req->r_old_dentry)
@@ -965,6 +1118,8 @@ void ceph_mdsc_release_request(struct kref *kref)
        put_cred(req->r_cred);
        if (req->r_pagelist)
                ceph_pagelist_release(req->r_pagelist);
+       kfree(req->r_fscrypt_auth);
+       kfree(req->r_altname);
        put_request_session(req);
        ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
        WARN_ON_ONCE(!list_empty(&req->r_wait));
@@ -2373,20 +2528,90 @@ static inline  u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
        return mdsc->oldest_tid;
 }
 
-/*
- * Build a dentry's path.  Allocate on heap; caller must kfree.  Based
- * on build_path_from_dentry in fs/cifs/dir.c.
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
+{
+       struct inode *dir = req->r_parent;
+       struct dentry *dentry = req->r_dentry;
+       u8 *cryptbuf = NULL;
+       u32 len = 0;
+       int ret = 0;
+
+       /* only encode if we have parent and dentry */
+       if (!dir || !dentry)
+               goto success;
+
+       /* No-op unless this is encrypted */
+       if (!IS_ENCRYPTED(dir))
+               goto success;
+
+       ret = ceph_fscrypt_prepare_readdir(dir);
+       if (ret < 0)
+               return ERR_PTR(ret);
+
+       /* No key? Just ignore it. */
+       if (!fscrypt_has_encryption_key(dir))
+               goto success;
+
+       if (!fscrypt_fname_encrypted_size(dir, dentry->d_name.len, NAME_MAX,
+                                         &len)) {
+               WARN_ON_ONCE(1);
+               return ERR_PTR(-ENAMETOOLONG);
+       }
+
+       /* No need to append altname if name is short enough */
+       if (len <= CEPH_NOHASH_NAME_MAX) {
+               len = 0;
+               goto success;
+       }
+
+       cryptbuf = kmalloc(len, GFP_KERNEL);
+       if (!cryptbuf)
+               return ERR_PTR(-ENOMEM);
+
+       ret = fscrypt_fname_encrypt(dir, &dentry->d_name, cryptbuf, len);
+       if (ret) {
+               kfree(cryptbuf);
+               return ERR_PTR(ret);
+       }
+success:
+       *plen = len;
+       return cryptbuf;
+}
+#else
+static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
+{
+       *plen = 0;
+       return NULL;
+}
+#endif
+
+/**
+ * ceph_mdsc_build_path - build a path string to a given dentry
+ * @dentry: dentry to which path should be built
+ * @plen: returned length of string
+ * @pbase: returned base inode number
+ * @for_wire: is this path going to be sent to the MDS?
+ *
+ * Build a string that represents the path to the dentry. This is mostly called
+ * for two different purposes:
+ *
+ * 1) we need to build a path string to send to the MDS (for_wire == true)
+ * 2) we need a path string for local presentation (e.g. debugfs)
+ *    (for_wire == false)
  *
- * If @stop_on_nosnap, generate path relative to the first non-snapped
- * inode.
+ * The path is built in reverse, starting with the dentry. Walk back up toward
+ * the root, building the path until the first non-snapped inode is reached
+ * (for_wire) or the root inode is reached (!for_wire).
  *
  * Encode hidden .snap dirs as a double /, i.e.
  *   foo/.snap/bar -> foo//bar
  */
 char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
-                          int stop_on_nosnap)
+                          int for_wire)
 {
-       struct dentry *temp;
+       struct dentry *cur;
+       struct inode *inode;
        char *path;
        int pos;
        unsigned seq;
@@ -2403,34 +2628,72 @@ retry:
        path[pos] = '\0';
 
        seq = read_seqbegin(&rename_lock);
-       rcu_read_lock();
-       temp = dentry;
+       cur = dget(dentry);
        for (;;) {
-               struct inode *inode;
+               struct dentry *parent;
 
-               spin_lock(&temp->d_lock);
-               inode = d_inode(temp);
+               spin_lock(&cur->d_lock);
+               inode = d_inode(cur);
                if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
                        dout("build_path path+%d: %p SNAPDIR\n",
-                            pos, temp);
-               } else if (stop_on_nosnap && inode && dentry != temp &&
+                            pos, cur);
+                       spin_unlock(&cur->d_lock);
+                       parent = dget_parent(cur);
+               } else if (for_wire && inode && dentry != cur &&
                           ceph_snap(inode) == CEPH_NOSNAP) {
-                       spin_unlock(&temp->d_lock);
+                       spin_unlock(&cur->d_lock);
                        pos++; /* get rid of any prepended '/' */
                        break;
+               } else if (!for_wire || !IS_ENCRYPTED(d_inode(cur->d_parent))) {
+                       pos -= cur->d_name.len;
+                       if (pos < 0) {
+                               spin_unlock(&cur->d_lock);
+                               break;
+                       }
+                       memcpy(path + pos, cur->d_name.name, cur->d_name.len);
+                       spin_unlock(&cur->d_lock);
+                       parent = dget_parent(cur);
                } else {
-                       pos -= temp->d_name.len;
+                       int len, ret;
+                       char buf[NAME_MAX];
+
+                       /*
+                        * Proactively copy name into buf, in case we need to
+                        * present it as-is.
+                        */
+                       memcpy(buf, cur->d_name.name, cur->d_name.len);
+                       len = cur->d_name.len;
+                       spin_unlock(&cur->d_lock);
+                       parent = dget_parent(cur);
+
+                       ret = ceph_fscrypt_prepare_readdir(d_inode(parent));
+                       if (ret < 0) {
+                               dput(parent);
+                               dput(cur);
+                               return ERR_PTR(ret);
+                       }
+
+                       if (fscrypt_has_encryption_key(d_inode(parent))) {
+                               len = ceph_encode_encrypted_fname(d_inode(parent),
+                                                                 cur, buf);
+                               if (len < 0) {
+                                       dput(parent);
+                                       dput(cur);
+                                       return ERR_PTR(len);
+                               }
+                       }
+                       pos -= len;
                        if (pos < 0) {
-                               spin_unlock(&temp->d_lock);
+                               dput(parent);
                                break;
                        }
-                       memcpy(path + pos, temp->d_name.name, temp->d_name.len);
+                       memcpy(path + pos, buf, len);
                }
-               spin_unlock(&temp->d_lock);
-               temp = READ_ONCE(temp->d_parent);
+               dput(cur);
+               cur = parent;
 
                /* Are we at the root? */
-               if (IS_ROOT(temp))
+               if (IS_ROOT(cur))
                        break;
 
                /* Are we out of buffer? */
@@ -2439,8 +2702,9 @@ retry:
 
                path[pos] = '/';
        }
-       base = ceph_ino(d_inode(temp));
-       rcu_read_unlock();
+       inode = d_inode(cur);
+       base = inode ? ceph_ino(inode) : 0;
+       dput(cur);
 
        if (read_seqretry(&rename_lock, seq))
                goto retry;
@@ -2450,8 +2714,8 @@ retry:
                 * A rename didn't occur, but somehow we didn't end up where
                 * we thought we would. Throw a warning and try again.
                 */
-               pr_warn("build_path did not end path lookup where "
-                       "expected, pos is %d\n", pos);
+               pr_warn("build_path did not end path lookup where expected (pos = %d)\n",
+                       pos);
                goto retry;
        }
 
@@ -2471,7 +2735,8 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
        rcu_read_lock();
        if (!dir)
                dir = d_inode_rcu(dentry->d_parent);
-       if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) {
+       if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP &&
+           !IS_ENCRYPTED(dir)) {
                *pino = ceph_ino(dir);
                rcu_read_unlock();
                *ppath = dentry->d_name.name;
@@ -2539,8 +2804,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
        return r;
 }
 
-static void encode_timestamp_and_gids(void **p,
-                                     const struct ceph_mds_request *req)
+static void encode_mclientrequest_tail(void **p,
+                                      const struct ceph_mds_request *req)
 {
        struct ceph_timespec ts;
        int i;
@@ -2548,11 +2813,43 @@ static void encode_timestamp_and_gids(void **p,
        ceph_encode_timespec64(&ts, &req->r_stamp);
        ceph_encode_copy(p, &ts, sizeof(ts));
 
-       /* gid_list */
+       /* v4: gid_list */
        ceph_encode_32(p, req->r_cred->group_info->ngroups);
        for (i = 0; i < req->r_cred->group_info->ngroups; i++)
                ceph_encode_64(p, from_kgid(&init_user_ns,
                                            req->r_cred->group_info->gid[i]));
+
+       /* v5: altname */
+       ceph_encode_32(p, req->r_altname_len);
+       ceph_encode_copy(p, req->r_altname, req->r_altname_len);
+
+       /* v6: fscrypt_auth and fscrypt_file */
+       if (req->r_fscrypt_auth) {
+               u32 authlen = ceph_fscrypt_auth_len(req->r_fscrypt_auth);
+
+               ceph_encode_32(p, authlen);
+               ceph_encode_copy(p, req->r_fscrypt_auth, authlen);
+       } else {
+               ceph_encode_32(p, 0);
+       }
+       if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags)) {
+               ceph_encode_32(p, sizeof(__le64));
+               ceph_encode_64(p, req->r_fscrypt_file);
+       } else {
+               ceph_encode_32(p, 0);
+       }
+}
+
+static struct ceph_mds_request_head_legacy *
+find_legacy_request_head(void *p, u64 features)
+{
+       bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
+       struct ceph_mds_request_head_old *ohead;
+
+       if (legacy)
+               return (struct ceph_mds_request_head_legacy *)p;
+       ohead = (struct ceph_mds_request_head_old *)p;
+       return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid;
 }
 
 /*
@@ -2565,7 +2862,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
        int mds = session->s_mds;
        struct ceph_mds_client *mdsc = session->s_mdsc;
        struct ceph_msg *msg;
-       struct ceph_mds_request_head_old *head;
+       struct ceph_mds_request_head_legacy *lhead;
        const char *path1 = NULL;
        const char *path2 = NULL;
        u64 ino1 = 0, ino2 = 0;
@@ -2577,6 +2874,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
        void *p, *end;
        int ret;
        bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
+       bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
+                                    &session->s_features);
 
        ret = set_request_path_attr(req->r_inode, req->r_dentry,
                              req->r_parent, req->r_path1, req->r_ino1.ino,
@@ -2601,12 +2900,32 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
                goto out_free1;
        }
 
-       len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head);
-       len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
-               sizeof(struct ceph_timespec);
-       len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups);
+       req->r_altname = get_fscrypt_altname(req, &req->r_altname_len);
+       if (IS_ERR(req->r_altname)) {
+               msg = ERR_CAST(req->r_altname);
+               req->r_altname = NULL;
+               goto out_free2;
+       }
+
+       /*
+        * For old cephs without supporting the 32bit retry/fwd feature
+        * it will copy the raw memories directly when decoding the
+        * requests. While new cephs will decode the head depending the
+        * version member, so we need to make sure it will be compatible
+        * with them both.
+        */
+       if (legacy)
+               len = sizeof(struct ceph_mds_request_head_legacy);
+       else if (old_version)
+               len = sizeof(struct ceph_mds_request_head_old);
+       else
+               len = sizeof(struct ceph_mds_request_head);
 
-       /* calculate (max) length for cap releases */
+       /* filepaths */
+       len += 2 * (1 + sizeof(u32) + sizeof(u64));
+       len += pathlen1 + pathlen2;
+
+       /* cap releases */
        len += sizeof(struct ceph_mds_request_release) *
                (!!req->r_inode_drop + !!req->r_dentry_drop +
                 !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
@@ -2616,6 +2935,27 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
        if (req->r_old_dentry_drop)
                len += pathlen2;
 
+       /* MClientRequest tail */
+
+       /* req->r_stamp */
+       len += sizeof(struct ceph_timespec);
+
+       /* gid list */
+       len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups);
+
+       /* alternate name */
+       len += sizeof(u32) + req->r_altname_len;
+
+       /* fscrypt_auth */
+       len += sizeof(u32); // fscrypt_auth
+       if (req->r_fscrypt_auth)
+               len += ceph_fscrypt_auth_len(req->r_fscrypt_auth);
+
+       /* fscrypt_file */
+       len += sizeof(u32);
+       if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags))
+               len += sizeof(__le64);
+
        msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
        if (!msg) {
                msg = ERR_PTR(-ENOMEM);
@@ -2624,33 +2964,40 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 
        msg->hdr.tid = cpu_to_le64(req->r_tid);
 
+       lhead = find_legacy_request_head(msg->front.iov_base,
+                                        session->s_con.peer_features);
+
        /*
-        * The old ceph_mds_request_head didn't contain a version field, and
+        * The ceph_mds_request_head_legacy didn't contain a version field, and
         * one was added when we moved the message version from 3->4.
         */
        if (legacy) {
                msg->hdr.version = cpu_to_le16(3);
-               head = msg->front.iov_base;
-               p = msg->front.iov_base + sizeof(*head);
-       } else {
-               struct ceph_mds_request_head *new_head = msg->front.iov_base;
+               p = msg->front.iov_base + sizeof(*lhead);
+       } else if (old_version) {
+               struct ceph_mds_request_head_old *ohead = msg->front.iov_base;
 
                msg->hdr.version = cpu_to_le16(4);
-               new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
-               head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
-               p = msg->front.iov_base + sizeof(*new_head);
+               ohead->version = cpu_to_le16(1);
+               p = msg->front.iov_base + sizeof(*ohead);
+       } else {
+               struct ceph_mds_request_head *nhead = msg->front.iov_base;
+
+               msg->hdr.version = cpu_to_le16(6);
+               nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
+               p = msg->front.iov_base + sizeof(*nhead);
        }
 
        end = msg->front.iov_base + msg->front.iov_len;
 
-       head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
-       head->op = cpu_to_le32(req->r_op);
-       head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
-                                                req->r_cred->fsuid));
-       head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
-                                                req->r_cred->fsgid));
-       head->ino = cpu_to_le64(req->r_deleg_ino);
-       head->args = req->r_args;
+       lhead->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
+       lhead->op = cpu_to_le32(req->r_op);
+       lhead->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
+                                                 req->r_cred->fsuid));
+       lhead->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
+                                                 req->r_cred->fsgid));
+       lhead->ino = cpu_to_le64(req->r_deleg_ino);
+       lhead->args = req->r_args;
 
        ceph_encode_filepath(&p, end, ino1, path1);
        ceph_encode_filepath(&p, end, ino2, path2);
@@ -2665,15 +3012,23 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
                      req->r_inode ? req->r_inode : d_inode(req->r_dentry),
                      mds, req->r_inode_drop, req->r_inode_unless,
                      req->r_op == CEPH_MDS_OP_READDIR);
-       if (req->r_dentry_drop)
-               releases += ceph_encode_dentry_release(&p, req->r_dentry,
+       if (req->r_dentry_drop) {
+               re= ceph_encode_dentry_release(&p, req->r_dentry,
                                req->r_parent, mds, req->r_dentry_drop,
                                req->r_dentry_unless);
-       if (req->r_old_dentry_drop)
-               releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
+               if (ret < 0)
+                       goto out_err;
+               releases += ret;
+       }
+       if (req->r_old_dentry_drop) {
+               ret = ceph_encode_dentry_release(&p, req->r_old_dentry,
                                req->r_old_dentry_dir, mds,
                                req->r_old_dentry_drop,
                                req->r_old_dentry_unless);
+               if (ret < 0)
+                       goto out_err;
+               releases += ret;
+       }
        if (req->r_old_inode_drop)
                releases += ceph_encode_inode_release(&p,
                      d_inode(req->r_old_dentry),
@@ -2684,9 +3039,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
                p = msg->front.iov_base + req->r_request_release_offset;
        }
 
-       head->num_releases = cpu_to_le16(releases);
+       lhead->num_releases = cpu_to_le16(releases);
 
-       encode_timestamp_and_gids(&p, req);
+       encode_mclientrequest_tail(&p, req);
 
        if (WARN_ON_ONCE(p > end)) {
                ceph_msg_put(msg);
@@ -2715,6 +3070,10 @@ out_free1:
                ceph_mdsc_free_path((char *)path1, pathlen1);
 out:
        return msg;
+out_err:
+       ceph_msg_put(msg);
+       msg = ERR_PTR(ret);
+       goto out_free2;
 }
 
 /*
@@ -2731,18 +3090,6 @@ static void complete_request(struct ceph_mds_client *mdsc,
        complete_all(&req->r_completion);
 }
 
-static struct ceph_mds_request_head_old *
-find_old_request_head(void *p, u64 features)
-{
-       bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
-       struct ceph_mds_request_head *new_head;
-
-       if (legacy)
-               return (struct ceph_mds_request_head_old *)p;
-       new_head = (struct ceph_mds_request_head *)p;
-       return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
-}
-
 /*
  * called under mdsc->mutex
  */
@@ -2752,29 +3099,28 @@ static int __prepare_send_request(struct ceph_mds_session *session,
 {
        int mds = session->s_mds;
        struct ceph_mds_client *mdsc = session->s_mdsc;
-       struct ceph_mds_request_head_old *rhead;
+       struct ceph_mds_request_head_legacy *lhead;
+       struct ceph_mds_request_head *nhead;
        struct ceph_msg *msg;
-       int flags = 0, max_retry;
+       int flags = 0, old_max_retry;
+       bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
+                                    &session->s_features);
 
        /*
-        * The type of 'r_attempts' in kernel 'ceph_mds_request'
-        * is 'int', while in 'ceph_mds_request_head' the type of
-        * 'num_retry' is '__u8'. So in case the request retries
-        *  exceeding 256 times, the MDS will receive a incorrect
-        *  retry seq.
-        *
-        * In this case it's ususally a bug in MDS and continue
-        * retrying the request makes no sense.
-        *
-        * In future this could be fixed in ceph code, so avoid
-        * using the hardcode here.
+        * Avoid inifinite retrying after overflow. The client will
+        * increase the retry count and if the MDS is old version,
+        * so we limit to retry at most 256 times.
         */
-       max_retry = sizeof_field(struct ceph_mds_request_head, num_retry);
-       max_retry = 1 << (max_retry * BITS_PER_BYTE);
-       if (req->r_attempts >= max_retry) {
-               pr_warn_ratelimited("%s request tid %llu seq overflow\n",
-                                   __func__, req->r_tid);
-               return -EMULTIHOP;
+       if (req->r_attempts) {
+              old_max_retry = sizeof_field(struct ceph_mds_request_head_old,
+                                           num_retry);
+              old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE);
+              if ((old_version && req->r_attempts >= old_max_retry) ||
+                  ((uint32_t)req->r_attempts >= U32_MAX)) {
+                       pr_warn_ratelimited("%s request tid %llu seq overflow\n",
+                                           __func__, req->r_tid);
+                       return -EMULTIHOP;
+              }
        }
 
        req->r_attempts++;
@@ -2800,23 +3146,27 @@ static int __prepare_send_request(struct ceph_mds_session *session,
                 * d_move mangles the src name.
                 */
                msg = req->r_request;
-               rhead = find_old_request_head(msg->front.iov_base,
-                                             session->s_con.peer_features);
+               lhead = find_legacy_request_head(msg->front.iov_base,
+                                                session->s_con.peer_features);
 
-               flags = le32_to_cpu(rhead->flags);
+               flags = le32_to_cpu(lhead->flags);
                flags |= CEPH_MDS_FLAG_REPLAY;
-               rhead->flags = cpu_to_le32(flags);
+               lhead->flags = cpu_to_le32(flags);
 
                if (req->r_target_inode)
-                       rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+                       lhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
 
-               rhead->num_retry = req->r_attempts - 1;
+               lhead->num_retry = req->r_attempts - 1;
+               if (!old_version) {
+                       nhead = (struct ceph_mds_request_head*)msg->front.iov_base;
+                       nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1);
+               }
 
                /* remove cap/dentry releases from message */
-               rhead->num_releases = 0;
+               lhead->num_releases = 0;
 
                p = msg->front.iov_base + req->r_request_release_offset;
-               encode_timestamp_and_gids(&p, req);
+               encode_mclientrequest_tail(&p, req);
 
                msg->front.iov_len = p - msg->front.iov_base;
                msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
@@ -2834,18 +3184,23 @@ static int __prepare_send_request(struct ceph_mds_session *session,
        }
        req->r_request = msg;
 
-       rhead = find_old_request_head(msg->front.iov_base,
-                                     session->s_con.peer_features);
-       rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
+       lhead = find_legacy_request_head(msg->front.iov_base,
+                                        session->s_con.peer_features);
+       lhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
        if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
                flags |= CEPH_MDS_FLAG_REPLAY;
        if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags))
                flags |= CEPH_MDS_FLAG_ASYNC;
        if (req->r_parent)
                flags |= CEPH_MDS_FLAG_WANT_DENTRY;
-       rhead->flags = cpu_to_le32(flags);
-       rhead->num_fwd = req->r_num_fwd;
-       rhead->num_retry = req->r_attempts - 1;
+       lhead->flags = cpu_to_le32(flags);
+       lhead->num_fwd = req->r_num_fwd;
+       lhead->num_retry = req->r_attempts - 1;
+       if (!old_version) {
+               nhead = (struct ceph_mds_request_head*)msg->front.iov_base;
+               nhead->ext_num_fwd = cpu_to_le32(req->r_num_fwd);
+               nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1);
+       }
 
        dout(" r_parent = %p\n", req->r_parent);
        return 0;
@@ -3348,22 +3703,35 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        }
 
        dout("handle_reply tid %lld result %d\n", tid, result);
-       rinfo = &req->r_reply_info;
        if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features))
-               err = parse_reply_info(session, msg, rinfo, (u64)-1);
+               err = parse_reply_info(session, msg, req, (u64)-1);
        else
-               err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
+               err = parse_reply_info(session, msg, req,
+                                      session->s_con.peer_features);
        mutex_unlock(&mdsc->mutex);
 
        /* Must find target inode outside of mutexes to avoid deadlocks */
+       rinfo = &req->r_reply_info;
        if ((err >= 0) && rinfo->head->is_target) {
-               struct inode *in;
+               struct inode *in = xchg(&req->r_new_inode, NULL);
                struct ceph_vino tvino = {
                        .ino  = le64_to_cpu(rinfo->targeti.in->ino),
                        .snap = le64_to_cpu(rinfo->targeti.in->snapid)
                };
 
-               in = ceph_get_inode(mdsc->fsc->sb, tvino);
+               /*
+                * If we ended up opening an existing inode, discard
+                * r_new_inode
+                */
+               if (req->r_op == CEPH_MDS_OP_CREATE &&
+                   !req->r_reply_info.has_create_ino) {
+                       /* This should never happen on an async create */
+                       WARN_ON_ONCE(req->r_deleg_ino);
+                       iput(in);
+                       in = NULL;
+               }
+
+               in = ceph_get_inode(mdsc->fsc->sb, tvino, in);
                if (IS_ERR(in)) {
                        err = PTR_ERR(in);
                        mutex_lock(&session->s_mutex);
@@ -3406,7 +3774,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
        if (err == 0) {
                if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
                                    req->r_op == CEPH_MDS_OP_LSSNAP))
-                       ceph_readdir_prepopulate(req, req->r_session);
+                       err = ceph_readdir_prepopulate(req, req->r_session);
        }
        current->journal_info = NULL;
        mutex_unlock(&req->r_fill_mutex);
@@ -3491,33 +3859,21 @@ static void handle_forward(struct ceph_mds_client *mdsc,
        if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
                dout("forward tid %llu aborted, unregistering\n", tid);
                __unregister_request(mdsc, req);
-       } else if (fwd_seq <= req->r_num_fwd) {
+       } else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) {
                /*
-                * The type of 'num_fwd' in ceph 'MClientRequestForward'
-                * is 'int32_t', while in 'ceph_mds_request_head' the
-                * type is '__u8'. So in case the request bounces between
-                * MDSes exceeding 256 times, the client will get stuck.
-                *
-                * In this case it's ususally a bug in MDS and continue
-                * bouncing the request makes no sense.
+                * Avoid inifinite retrying after overflow.
                 *
-                * In future this could be fixed in ceph code, so avoid
-                * using the hardcode here.
+                * The MDS will increase the fwd count and in client side
+                * if the num_fwd is less than the one saved in request
+                * that means the MDS is an old version and overflowed of
+                * 8 bits.
                 */
-               int max = sizeof_field(struct ceph_mds_request_head, num_fwd);
-               max = 1 << (max * BITS_PER_BYTE);
-               if (req->r_num_fwd >= max) {
-                       mutex_lock(&req->r_fill_mutex);
-                       req->r_err = -EMULTIHOP;
-                       set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
-                       mutex_unlock(&req->r_fill_mutex);
-                       aborted = true;
-                       pr_warn_ratelimited("forward tid %llu seq overflow\n",
-                                           tid);
-               } else {
-                       dout("forward tid %llu to mds%d - old seq %d <= %d\n",
-                            tid, next_mds, req->r_num_fwd, fwd_seq);
-               }
+               mutex_lock(&req->r_fill_mutex);
+               req->r_err = -EMULTIHOP;
+               set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+               mutex_unlock(&req->r_fill_mutex);
+               aborted = true;
+               pr_warn_ratelimited("forward tid %llu seq overflow\n", tid);
        } else {
                /* resend. forward race not possible; mds would drop */
                dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
@@ -4550,6 +4906,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
 
        dout("handle_lease from mds%d\n", mds);
 
+       if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+               return;
+
        /* decode */
        if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
                goto bad;
@@ -4568,8 +4927,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
             dname.len, dname.name);
 
        mutex_lock(&session->s_mutex);
-       inc_session_sequence(session);
-
        if (!inode) {
                dout("handle_lease no inode %llx\n", vino.ino);
                goto release;
@@ -4631,9 +4988,13 @@ release:
 out:
        mutex_unlock(&session->s_mutex);
        iput(inode);
+
+       ceph_dec_mds_stopping_blocker(mdsc);
        return;
 
 bad:
+       ceph_dec_mds_stopping_blocker(mdsc);
+
        pr_err("corrupt lease message\n");
        ceph_msg_dump(msg);
 }
@@ -4829,6 +5190,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        }
 
        init_completion(&mdsc->safe_umount_waiters);
+       spin_lock_init(&mdsc->stopping_lock);
+       atomic_set(&mdsc->stopping_blockers, 0);
+       init_completion(&mdsc->stopping_waiter);
        init_waitqueue_head(&mdsc->session_close_wq);
        INIT_LIST_HEAD(&mdsc->waiting_for_map);
        mdsc->quotarealms_inodes = RB_ROOT;
index 86d2965..5a3714b 100644 (file)
@@ -32,8 +32,9 @@ enum ceph_feature_type {
        CEPHFS_FEATURE_ALTERNATE_NAME,
        CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
        CEPHFS_FEATURE_OP_GETVXATTR,
+       CEPHFS_FEATURE_32BITS_RETRY_FWD,
 
-       CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_OP_GETVXATTR,
+       CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_32BITS_RETRY_FWD,
 };
 
 #define CEPHFS_FEATURES_CLIENT_SUPPORTED {     \
@@ -44,8 +45,10 @@ enum ceph_feature_type {
        CEPHFS_FEATURE_MULTI_RECONNECT,         \
        CEPHFS_FEATURE_DELEG_INO,               \
        CEPHFS_FEATURE_METRIC_COLLECT,          \
+       CEPHFS_FEATURE_ALTERNATE_NAME,          \
        CEPHFS_FEATURE_NOTIFY_SESSION_STATE,    \
        CEPHFS_FEATURE_OP_GETVXATTR,            \
+       CEPHFS_FEATURE_32BITS_RETRY_FWD,        \
 }
 
 /*
@@ -86,13 +89,19 @@ struct ceph_mds_reply_info_in {
        s32 dir_pin;
        struct ceph_timespec btime;
        struct ceph_timespec snap_btime;
+       u8 *fscrypt_auth;
+       u8 *fscrypt_file;
+       u32 fscrypt_auth_len;
+       u32 fscrypt_file_len;
        u64 rsnaps;
        u64 change_attr;
 };
 
 struct ceph_mds_reply_dir_entry {
+       bool                          is_nokey;
        char                          *name;
        u32                           name_len;
+       u32                           raw_hash;
        struct ceph_mds_reply_lease   *lease;
        struct ceph_mds_reply_info_in inode;
        loff_t                        offset;
@@ -116,7 +125,9 @@ struct ceph_mds_reply_info_parsed {
        struct ceph_mds_reply_info_in diri, targeti;
        struct ceph_mds_reply_dirfrag *dirfrag;
        char                          *dname;
+       u8                            *altname;
        u32                           dname_len;
+       u32                           altname_len;
        struct ceph_mds_reply_lease   *dlease;
        struct ceph_mds_reply_xattr   xattr_info;
 
@@ -263,6 +274,7 @@ struct ceph_mds_request {
 
        struct inode *r_parent;             /* parent dir inode */
        struct inode *r_target_inode;       /* resulting inode */
+       struct inode *r_new_inode;          /* new inode (for creates) */
 
 #define CEPH_MDS_R_DIRECT_IS_HASH      (1) /* r_direct_hash is valid */
 #define CEPH_MDS_R_ABORTED             (2) /* call was aborted */
@@ -272,11 +284,19 @@ struct ceph_mds_request {
 #define CEPH_MDS_R_DID_PREPOPULATE     (6) /* prepopulated readdir */
 #define CEPH_MDS_R_PARENT_LOCKED       (7) /* is r_parent->i_rwsem wlocked? */
 #define CEPH_MDS_R_ASYNC               (8) /* async request */
+#define CEPH_MDS_R_FSCRYPT_FILE                (9) /* must marshal fscrypt_file field */
        unsigned long   r_req_flags;
 
        struct mutex r_fill_mutex;
 
        union ceph_mds_request_args r_args;
+
+       struct ceph_fscrypt_auth *r_fscrypt_auth;
+       u64     r_fscrypt_file;
+
+       u8 *r_altname;              /* fscrypt binary crypttext for long filenames */
+       u32 r_altname_len;          /* length of r_altname */
+
        int r_fmode;        /* file mode, if expecting cap */
        int r_request_release_offset;
        const struct cred *r_cred;
@@ -381,8 +401,9 @@ struct cap_wait {
 };
 
 enum {
-       CEPH_MDSC_STOPPING_BEGIN = 1,
-       CEPH_MDSC_STOPPING_FLUSHED = 2,
+       CEPH_MDSC_STOPPING_BEGIN = 1,
+       CEPH_MDSC_STOPPING_FLUSHING = 2,
+       CEPH_MDSC_STOPPING_FLUSHED = 3,
 };
 
 /*
@@ -401,7 +422,11 @@ struct ceph_mds_client {
        struct ceph_mds_session **sessions;    /* NULL for mds if no session */
        atomic_t                num_sessions;
        int                     max_sessions;  /* len of sessions array */
-       int                     stopping;      /* true if shutting down */
+
+       spinlock_t              stopping_lock;  /* protect snap_empty */
+       int                     stopping;      /* the stage of shutting down */
+       atomic_t                stopping_blockers;
+       struct completion       stopping_waiter;
 
        atomic64_t              quotarealms_count; /* # realms with quota */
        /*
@@ -557,7 +582,7 @@ static inline void ceph_mdsc_free_path(char *path, int len)
 }
 
 extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
-                                 int stop_on_nosnap);
+                                 int for_wire);
 
 extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
 extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
index 64592ad..f7fcf7f 100644 (file)
@@ -47,25 +47,23 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
        struct inode *inode;
        struct ceph_inode_info *ci;
 
+       if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+               return;
+
        if (msg->front.iov_len < sizeof(*h)) {
                pr_err("%s corrupt message mds%d len %d\n", __func__,
                       session->s_mds, (int)msg->front.iov_len);
                ceph_msg_dump(msg);
-               return;
+               goto out;
        }
 
-       /* increment msg sequence number */
-       mutex_lock(&session->s_mutex);
-       inc_session_sequence(session);
-       mutex_unlock(&session->s_mutex);
-
        /* lookup inode */
        vino.ino = le64_to_cpu(h->ino);
        vino.snap = CEPH_NOSNAP;
        inode = ceph_find_inode(sb, vino);
        if (!inode) {
                pr_warn("Failed to find inode %llu\n", vino.ino);
-               return;
+               goto out;
        }
        ci = ceph_inode(inode);
 
@@ -78,6 +76,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
        spin_unlock(&ci->i_ceph_lock);
 
        iput(inode);
+out:
+       ceph_dec_mds_stopping_blocker(mdsc);
 }
 
 static struct ceph_quotarealm_inode *
index c9920ad..813f21a 100644 (file)
@@ -1015,6 +1015,9 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
        int locked_rwsem = 0;
        bool close_sessions = false;
 
+       if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+               return;
+
        /* decode */
        if (msg->front.iov_len < sizeof(*h))
                goto bad;
@@ -1030,10 +1033,6 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
        dout("%s from mds%d op %s split %llx tracelen %d\n", __func__,
             mds, ceph_snap_op_name(op), split, trace_len);
 
-       mutex_lock(&session->s_mutex);
-       inc_session_sequence(session);
-       mutex_unlock(&session->s_mutex);
-
        down_write(&mdsc->snap_rwsem);
        locked_rwsem = 1;
 
@@ -1151,6 +1150,7 @@ skip_inode:
        up_write(&mdsc->snap_rwsem);
 
        flush_snaps(mdsc);
+       ceph_dec_mds_stopping_blocker(mdsc);
        return;
 
 bad:
@@ -1160,6 +1160,8 @@ out:
        if (locked_rwsem)
                up_write(&mdsc->snap_rwsem);
 
+       ceph_dec_mds_stopping_blocker(mdsc);
+
        if (close_sessions)
                ceph_mdsc_close_sessions(mdsc);
        return;
index a5f5201..2d7f5a8 100644 (file)
@@ -20,6 +20,7 @@
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
+#include "crypto.h"
 
 #include <linux/ceph/ceph_features.h>
 #include <linux/ceph/decode.h>
@@ -46,6 +47,7 @@ static void ceph_put_super(struct super_block *s)
        struct ceph_fs_client *fsc = ceph_sb_to_client(s);
 
        dout("put_super\n");
+       ceph_fscrypt_free_dummy_policy(fsc);
        ceph_mdsc_close_sessions(fsc->mdsc);
 }
 
@@ -151,6 +153,7 @@ enum {
        Opt_recover_session,
        Opt_source,
        Opt_mon_addr,
+       Opt_test_dummy_encryption,
        /* string args above */
        Opt_dirstat,
        Opt_rbytes,
@@ -165,6 +168,7 @@ enum {
        Opt_copyfrom,
        Opt_wsync,
        Opt_pagecache,
+       Opt_sparseread,
 };
 
 enum ceph_recover_session_mode {
@@ -192,6 +196,7 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
        fsparam_string  ("fsc",                         Opt_fscache), // fsc=...
        fsparam_flag_no ("ino32",                       Opt_ino32),
        fsparam_string  ("mds_namespace",               Opt_mds_namespace),
+       fsparam_string  ("mon_addr",                    Opt_mon_addr),
        fsparam_flag_no ("poolperm",                    Opt_poolperm),
        fsparam_flag_no ("quotadf",                     Opt_quotadf),
        fsparam_u32     ("rasize",                      Opt_rasize),
@@ -203,10 +208,12 @@ static const struct fs_parameter_spec ceph_mount_parameters[] = {
        fsparam_u32     ("rsize",                       Opt_rsize),
        fsparam_string  ("snapdirname",                 Opt_snapdirname),
        fsparam_string  ("source",                      Opt_source),
-       fsparam_string  ("mon_addr",                    Opt_mon_addr),
+       fsparam_flag    ("test_dummy_encryption",       Opt_test_dummy_encryption),
+       fsparam_string  ("test_dummy_encryption",       Opt_test_dummy_encryption),
        fsparam_u32     ("wsize",                       Opt_wsize),
        fsparam_flag_no ("wsync",                       Opt_wsync),
        fsparam_flag_no ("pagecache",                   Opt_pagecache),
+       fsparam_flag_no ("sparseread",                  Opt_sparseread),
        {}
 };
 
@@ -576,6 +583,29 @@ static int ceph_parse_mount_param(struct fs_context *fc,
                else
                        fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
                break;
+       case Opt_sparseread:
+               if (result.negated)
+                       fsopt->flags &= ~CEPH_MOUNT_OPT_SPARSEREAD;
+               else
+                       fsopt->flags |= CEPH_MOUNT_OPT_SPARSEREAD;
+               break;
+       case Opt_test_dummy_encryption:
+#ifdef CONFIG_FS_ENCRYPTION
+               fscrypt_free_dummy_policy(&fsopt->dummy_enc_policy);
+               ret = fscrypt_parse_test_dummy_encryption(param,
+                                               &fsopt->dummy_enc_policy);
+               if (ret == -EINVAL) {
+                       warnfc(fc, "Value of option \"%s\" is unrecognized",
+                              param->key);
+               } else if (ret == -EEXIST) {
+                       warnfc(fc, "Conflicting test_dummy_encryption options");
+                       ret = -EINVAL;
+               }
+#else
+               warnfc(fc,
+                      "FS encryption not supported: test_dummy_encryption mount option ignored");
+#endif
+               break;
        default:
                BUG();
        }
@@ -596,6 +626,7 @@ static void destroy_mount_options(struct ceph_mount_options *args)
        kfree(args->server_path);
        kfree(args->fscache_uniq);
        kfree(args->mon_addr);
+       fscrypt_free_dummy_policy(&args->dummy_enc_policy);
        kfree(args);
 }
 
@@ -710,9 +741,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 
        if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
                seq_puts(m, ",wsync");
-
        if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
                seq_puts(m, ",nopagecache");
+       if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
+               seq_puts(m, ",sparseread");
+
+       fscrypt_show_test_dummy_encryption(m, ',', root->d_sb);
 
        if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
                seq_printf(m, ",wsize=%u", fsopt->wsize);
@@ -1052,6 +1086,50 @@ out:
        return root;
 }
 
+#ifdef CONFIG_FS_ENCRYPTION
+static int ceph_apply_test_dummy_encryption(struct super_block *sb,
+                                           struct fs_context *fc,
+                                           struct ceph_mount_options *fsopt)
+{
+       struct ceph_fs_client *fsc = sb->s_fs_info;
+
+       if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy))
+               return 0;
+
+       /* No changing encryption context on remount. */
+       if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
+           !fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
+               if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
+                                                &fsc->fsc_dummy_enc_policy))
+                       return 0;
+               errorfc(fc, "Can't set test_dummy_encryption on remount");
+               return -EINVAL;
+       }
+
+       /* Also make sure fsopt doesn't contain a conflicting value. */
+       if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
+               if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
+                                                &fsc->fsc_dummy_enc_policy))
+                       return 0;
+               errorfc(fc, "Conflicting test_dummy_encryption options");
+               return -EINVAL;
+       }
+
+       fsc->fsc_dummy_enc_policy = fsopt->dummy_enc_policy;
+       memset(&fsopt->dummy_enc_policy, 0, sizeof(fsopt->dummy_enc_policy));
+
+       warnfc(fc, "test_dummy_encryption mode enabled");
+       return 0;
+}
+#else
+static int ceph_apply_test_dummy_encryption(struct super_block *sb,
+                                           struct fs_context *fc,
+                                           struct ceph_mount_options *fsopt)
+{
+       return 0;
+}
+#endif
+
 /*
  * mount: join the ceph cluster, and open root directory.
  */
@@ -1080,6 +1158,11 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
                                goto out;
                }
 
+               err = ceph_apply_test_dummy_encryption(fsc->sb, fc,
+                                                      fsc->mount_options);
+               if (err)
+                       goto out;
+
                dout("mount opening path '%s'\n", path);
 
                ceph_fs_debugfs_init(fsc);
@@ -1101,6 +1184,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 
 out:
        mutex_unlock(&fsc->client->mount_mutex);
+       ceph_fscrypt_free_dummy_policy(fsc);
        return ERR_PTR(err);
 }
 
@@ -1126,6 +1210,8 @@ static int ceph_set_super(struct super_block *s, struct fs_context *fc)
        s->s_time_max = U32_MAX;
        s->s_flags |= SB_NODIRATIME | SB_NOATIME;
 
+       ceph_fscrypt_set_ops(s);
+
        ret = set_anon_super_fc(s, fc);
        if (ret != 0)
                fsc->sb = NULL;
@@ -1287,15 +1373,26 @@ static void ceph_free_fc(struct fs_context *fc)
 
 static int ceph_reconfigure_fc(struct fs_context *fc)
 {
+       int err;
        struct ceph_parse_opts_ctx *pctx = fc->fs_private;
        struct ceph_mount_options *fsopt = pctx->opts;
-       struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb);
+       struct super_block *sb = fc->root->d_sb;
+       struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+       err = ceph_apply_test_dummy_encryption(sb, fc, fsopt);
+       if (err)
+               return err;
 
        if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
                ceph_set_mount_opt(fsc, ASYNC_DIROPS);
        else
                ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
 
+       if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
+               ceph_set_mount_opt(fsc, SPARSEREAD);
+       else
+               ceph_clear_mount_opt(fsc, SPARSEREAD);
+
        if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
                kfree(fsc->mount_options->mon_addr);
                fsc->mount_options->mon_addr = fsopt->mon_addr;
@@ -1303,7 +1400,7 @@ static int ceph_reconfigure_fc(struct fs_context *fc)
                pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
        }
 
-       sync_filesystem(fc->root->d_sb);
+       sync_filesystem(sb);
        return 0;
 }
 
@@ -1365,25 +1462,101 @@ nomem:
        return -ENOMEM;
 }
 
+/*
+ * Return true if it successfully increases the blocker counter,
+ * or false if the mdsc is in stopping and flushed state.
+ */
+static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       spin_lock(&mdsc->stopping_lock);
+       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
+               spin_unlock(&mdsc->stopping_lock);
+               return false;
+       }
+       atomic_inc(&mdsc->stopping_blockers);
+       spin_unlock(&mdsc->stopping_lock);
+       return true;
+}
+
+static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       spin_lock(&mdsc->stopping_lock);
+       if (!atomic_dec_return(&mdsc->stopping_blockers) &&
+           mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
+               complete_all(&mdsc->stopping_waiter);
+       spin_unlock(&mdsc->stopping_lock);
+}
+
+/* For metadata IO requests */
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+                                  struct ceph_mds_session *session)
+{
+       mutex_lock(&session->s_mutex);
+       inc_session_sequence(session);
+       mutex_unlock(&session->s_mutex);
+
+       return __inc_stopping_blocker(mdsc);
+}
+
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       __dec_stopping_blocker(mdsc);
+}
+
+/* For data IO requests */
+bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       return __inc_stopping_blocker(mdsc);
+}
+
+void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+       __dec_stopping_blocker(mdsc);
+}
+
 static void ceph_kill_sb(struct super_block *s)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
+       bool wait;
 
        dout("kill_sb %p\n", s);
 
-       ceph_mdsc_pre_umount(fsc->mdsc);
+       ceph_mdsc_pre_umount(mdsc);
        flush_fs_workqueues(fsc);
 
        /*
         * Though the kill_anon_super() will finally trigger the
-        * sync_filesystem() anyway, we still need to do it here
-        * and then bump the stage of shutdown to stop the work
-        * queue as earlier as possible.
+        * sync_filesystem() anyway, we still need to do it here and
+        * then bump the stage of shutdown. This will allow us to
+        * drop any further message, which will increase the inodes'
+        * i_count reference counters but makes no sense any more,
+        * from MDSs.
+        *
+        * Without this when evicting the inodes it may fail in the
+        * kill_anon_super(), which will trigger a warning when
+        * destroying the fscrypt keyring and then possibly trigger
+        * a further crash in ceph module when the iput() tries to
+        * evict the inodes later.
         */
        sync_filesystem(s);
 
-       fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+       spin_lock(&mdsc->stopping_lock);
+       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
+       wait = !!atomic_read(&mdsc->stopping_blockers);
+       spin_unlock(&mdsc->stopping_lock);
+
+       if (wait && atomic_read(&mdsc->stopping_blockers)) {
+               long timeleft = wait_for_completion_killable_timeout(
+                                       &mdsc->stopping_waiter,
+                                       fsc->client->options->mount_timeout);
+               if (!timeleft) /* timed out */
+                       pr_warn("umount timed out, %ld\n", timeleft);
+               else if (timeleft < 0) /* killed */
+                       pr_warn("umount was killed, %ld\n", timeleft);
+       }
 
+       mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
        kill_anon_super(s);
 
        fsc->client->extra_mon_dispatch = NULL;
index 3bfddf3..51c7f2b 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/hashtable.h>
 
 #include <linux/ceph/libceph.h>
+#include "crypto.h"
 
 /* large granularity for statfs utilization stats to facilitate
  * large volume sizes on 32-bit machines. */
@@ -42,6 +43,7 @@
 #define CEPH_MOUNT_OPT_NOCOPYFROM      (1<<14) /* don't use RADOS 'copy-from' op */
 #define CEPH_MOUNT_OPT_ASYNC_DIROPS    (1<<15) /* allow async directory ops */
 #define CEPH_MOUNT_OPT_NOPAGECACHE     (1<<16) /* bypass pagecache altogether */
+#define CEPH_MOUNT_OPT_SPARSEREAD      (1<<17) /* always do sparse reads */
 
 #define CEPH_MOUNT_OPT_DEFAULT                 \
        (CEPH_MOUNT_OPT_DCACHE |                \
@@ -98,6 +100,7 @@ struct ceph_mount_options {
        char *server_path;    /* default NULL (means "/") */
        char *fscache_uniq;   /* default NULL */
        char *mon_addr;
+       struct fscrypt_dummy_policy dummy_enc_policy;
 };
 
 /* mount state */
@@ -154,9 +157,11 @@ struct ceph_fs_client {
 #ifdef CONFIG_CEPH_FSCACHE
        struct fscache_volume *fscache;
 #endif
+#ifdef CONFIG_FS_ENCRYPTION
+       struct fscrypt_dummy_policy fsc_dummy_enc_policy;
+#endif
 };
 
-
 /*
  * File i/o capability.  This tracks shared state with the metadata
  * server that allows us to cache or writeback attributes or to read
@@ -419,6 +424,11 @@ struct ceph_inode_info {
        u32 i_truncate_seq;        /* last truncate to smaller size */
        u64 i_truncate_size;       /*  and the size we last truncated down to */
        int i_truncate_pending;    /*  still need to call vmtruncate */
+       /*
+        * For none fscrypt case it equals to i_truncate_size or it will
+        * equals to fscrypt_file_size
+        */
+       u64 i_truncate_pagecache_size;
 
        u64 i_max_size;            /* max file size authorized by mds */
        u64 i_reported_size; /* (max_)size reported to or requested of mds */
@@ -449,6 +459,13 @@ struct ceph_inode_info {
 
        struct work_struct i_work;
        unsigned long  i_work_mask;
+
+#ifdef CONFIG_FS_ENCRYPTION
+       u32 fscrypt_auth_len;
+       u32 fscrypt_file_len;
+       u8 *fscrypt_auth;
+       u8 *fscrypt_file;
+#endif
 };
 
 struct ceph_netfs_request_data {
@@ -998,6 +1015,7 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci)
 /* inode.c */
 struct ceph_mds_reply_info_in;
 struct ceph_mds_reply_dirfrag;
+struct ceph_acl_sec_ctx;
 
 extern const struct inode_operations ceph_file_iops;
 
@@ -1005,8 +1023,14 @@ extern struct inode *ceph_alloc_inode(struct super_block *sb);
 extern void ceph_evict_inode(struct inode *inode);
 extern void ceph_free_inode(struct inode *inode);
 
+struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
+                            umode_t *mode, struct ceph_acl_sec_ctx *as_ctx);
+void ceph_as_ctx_to_req(struct ceph_mds_request *req,
+                       struct ceph_acl_sec_ctx *as_ctx);
+
 extern struct inode *ceph_get_inode(struct super_block *sb,
-                                   struct ceph_vino vino);
+                                   struct ceph_vino vino,
+                                   struct inode *newino);
 extern struct inode *ceph_get_snapdir(struct inode *parent);
 extern int ceph_fill_file_size(struct inode *inode, int issued,
                               u32 truncate_seq, u64 truncate_size, u64 size);
@@ -1065,7 +1089,13 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
 }
 extern int ceph_permission(struct mnt_idmap *idmap,
                           struct inode *inode, int mask);
-extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
+
+struct ceph_iattr {
+       struct ceph_fscrypt_auth        *fscrypt_auth;
+};
+
+extern int __ceph_setattr(struct inode *inode, struct iattr *attr,
+                         struct ceph_iattr *cia);
 extern int ceph_setattr(struct mnt_idmap *idmap,
                        struct dentry *dentry, struct iattr *attr);
 extern int ceph_getattr(struct mnt_idmap *idmap,
@@ -1100,6 +1130,9 @@ struct ceph_acl_sec_ctx {
        void *sec_ctx;
        u32 sec_ctxlen;
 #endif
+#ifdef CONFIG_FS_ENCRYPTION
+       struct ceph_fscrypt_auth *fscrypt_auth;
+#endif
        struct ceph_pagelist *pagelist;
 };
 
@@ -1237,6 +1270,8 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
                                      struct inode *dir,
                                      int mds, int drop, int unless);
 
+extern int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi,
+                          int need, int want, loff_t endoff, int *got);
 extern int ceph_get_caps(struct file *filp, int need, int want,
                         loff_t endoff, int *got);
 extern int ceph_try_get_caps(struct inode *inode,
@@ -1272,6 +1307,9 @@ extern int ceph_renew_caps(struct inode *inode, int fmode);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
                            struct file *file, unsigned flags, umode_t mode);
+extern ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+                               struct iov_iter *to, int *retry_op,
+                               u64 *last_objver);
 extern int ceph_release(struct inode *inode, struct file *filp);
 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
                                  char *data, size_t len);
@@ -1375,4 +1413,9 @@ extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
                                     struct kstatfs *buf);
 extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
 
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+                              struct ceph_mds_session *session);
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc);
+bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc);
+void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc);
 #endif /* _FS_CEPH_SUPER_H */
index 1cbd84c..0deae4a 100644 (file)
@@ -352,6 +352,24 @@ static ssize_t ceph_vxattrcb_auth_mds(struct ceph_inode_info *ci,
        return ret;
 }
 
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static bool ceph_vxattrcb_fscrypt_auth_exists(struct ceph_inode_info *ci)
+{
+       return ci->fscrypt_auth_len;
+}
+
+static ssize_t ceph_vxattrcb_fscrypt_auth(struct ceph_inode_info *ci,
+                                         char *val, size_t size)
+{
+       if (size) {
+               if (size < ci->fscrypt_auth_len)
+                       return -ERANGE;
+               memcpy(val, ci->fscrypt_auth, ci->fscrypt_auth_len);
+       }
+       return ci->fscrypt_auth_len;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
 #define CEPH_XATTR_NAME(_type, _name)  XATTR_CEPH_PREFIX #_type "." #_name
 #define CEPH_XATTR_NAME2(_type, _name, _name2) \
        XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
@@ -500,6 +518,15 @@ static struct ceph_vxattr ceph_common_vxattrs[] = {
                .exists_cb = NULL,
                .flags = VXATTR_FLAG_READONLY,
        },
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+       {
+               .name = "ceph.fscrypt.auth",
+               .name_size = sizeof("ceph.fscrypt.auth"),
+               .getxattr_cb = ceph_vxattrcb_fscrypt_auth,
+               .exists_cb = ceph_vxattrcb_fscrypt_auth_exists,
+               .flags = VXATTR_FLAG_READONLY,
+       },
+#endif /* CONFIG_FS_ENCRYPTION */
        { .name = NULL, 0 }     /* Required table terminator */
 };
 
@@ -1408,6 +1435,9 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx)
 #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
        security_release_secctx(as_ctx->sec_ctx, as_ctx->sec_ctxlen);
 #endif
+#ifdef CONFIG_FS_ENCRYPTION
+       kfree(as_ctx->fscrypt_auth);
+#endif
        if (as_ctx->pagelist)
                ceph_pagelist_release(as_ctx->pagelist);
 }
index 881524b..d707e69 100644 (file)
@@ -92,7 +92,7 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time)
 /*
  * Calculate the time in jiffies until a dentry/attributes are valid
  */
-static u64 time_to_jiffies(u64 sec, u32 nsec)
+u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
 {
        if (sec || nsec) {
                struct timespec64 ts = {
@@ -112,17 +112,7 @@ static u64 time_to_jiffies(u64 sec, u32 nsec)
 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
 {
        fuse_dentry_settime(entry,
-               time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
-}
-
-static u64 attr_timeout(struct fuse_attr_out *o)
-{
-       return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
-}
-
-u64 entry_attr_timeout(struct fuse_entry_out *o)
-{
-       return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+               fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
 }
 
 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
@@ -265,8 +255,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                        goto invalid;
 
                forget_all_cached_acls(inode);
-               fuse_change_attributes(inode, &outarg.attr,
-                                      entry_attr_timeout(&outarg),
+               fuse_change_attributes(inode, &outarg.attr, NULL,
+                                      ATTR_TIMEOUT(&outarg),
                                       attr_version);
                fuse_change_entry_timeout(entry, &outarg);
        } else if (inode) {
@@ -360,10 +350,14 @@ int fuse_valid_type(int m)
                S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
 }
 
+static bool fuse_valid_size(u64 size)
+{
+       return size <= LLONG_MAX;
+}
+
 bool fuse_invalid_attr(struct fuse_attr *attr)
 {
-       return !fuse_valid_type(attr->mode) ||
-               attr->size > LLONG_MAX;
+       return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
 }
 
 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
@@ -399,7 +393,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
                goto out_put_forget;
 
        *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
-                          &outarg->attr, entry_attr_timeout(outarg),
+                          &outarg->attr, ATTR_TIMEOUT(outarg),
                           attr_version);
        err = -ENOMEM;
        if (!*inode) {
@@ -686,7 +680,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        ff->nodeid = outentry.nodeid;
        ff->open_flags = outopen.open_flags;
        inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
-                         &outentry.attr, entry_attr_timeout(&outentry), 0);
+                         &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
        if (!inode) {
                flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
                fuse_sync_release(NULL, ff, flags);
@@ -755,7 +749,8 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
        if (err == -ENOSYS) {
                fc->no_create = 1;
                goto mknod;
-       }
+       } else if (err == -EEXIST)
+               fuse_invalidate_entry(entry);
 out_dput:
        dput(res);
        return err;
@@ -813,7 +808,7 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
                goto out_put_forget_req;
 
        inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
-                         &outarg.attr, entry_attr_timeout(&outarg), 0);
+                         &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
        if (!inode) {
                fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
                return -ENOMEM;
@@ -835,6 +830,8 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
        return 0;
 
  out_put_forget_req:
+       if (err == -EEXIST)
+               fuse_invalidate_entry(entry);
        kfree(forget);
        return err;
 }
@@ -986,7 +983,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
        if (!err) {
                fuse_dir_changed(dir);
                fuse_entry_unlinked(entry);
-       } else if (err == -EINTR)
+       } else if (err == -EINTR || err == -ENOENT)
                fuse_invalidate_entry(entry);
        return err;
 }
@@ -1009,7 +1006,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
        if (!err) {
                fuse_dir_changed(dir);
                fuse_entry_unlinked(entry);
-       } else if (err == -EINTR)
+       } else if (err == -EINTR || err == -ENOENT)
                fuse_invalidate_entry(entry);
        return err;
 }
@@ -1050,7 +1047,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
                /* newent will end up negative */
                if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
                        fuse_entry_unlinked(newent);
-       } else if (err == -EINTR) {
+       } else if (err == -EINTR || err == -ENOENT) {
                /* If request was interrupted, DEITY only knows if the
                   rename actually took place.  If the invalidation
                   fails (e.g. some process has CWD under the renamed
@@ -1153,6 +1150,87 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
        stat->blksize = 1 << blkbits;
 }
 
+static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
+{
+       memset(attr, 0, sizeof(*attr));
+       attr->ino = sx->ino;
+       attr->size = sx->size;
+       attr->blocks = sx->blocks;
+       attr->atime = sx->atime.tv_sec;
+       attr->mtime = sx->mtime.tv_sec;
+       attr->ctime = sx->ctime.tv_sec;
+       attr->atimensec = sx->atime.tv_nsec;
+       attr->mtimensec = sx->mtime.tv_nsec;
+       attr->ctimensec = sx->ctime.tv_nsec;
+       attr->mode = sx->mode;
+       attr->nlink = sx->nlink;
+       attr->uid = sx->uid;
+       attr->gid = sx->gid;
+       attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
+       attr->blksize = sx->blksize;
+}
+
+static int fuse_do_statx(struct inode *inode, struct file *file,
+                        struct kstat *stat)
+{
+       int err;
+       struct fuse_attr attr;
+       struct fuse_statx *sx;
+       struct fuse_statx_in inarg;
+       struct fuse_statx_out outarg;
+       struct fuse_mount *fm = get_fuse_mount(inode);
+       u64 attr_version = fuse_get_attr_version(fm->fc);
+       FUSE_ARGS(args);
+
+       memset(&inarg, 0, sizeof(inarg));
+       memset(&outarg, 0, sizeof(outarg));
+       /* Directories have separate file-handle space */
+       if (file && S_ISREG(inode->i_mode)) {
+               struct fuse_file *ff = file->private_data;
+
+               inarg.getattr_flags |= FUSE_GETATTR_FH;
+               inarg.fh = ff->fh;
+       }
+       /* For now leave sync hints as the default, request all stats. */
+       inarg.sx_flags = 0;
+       inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
+       args.opcode = FUSE_STATX;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.out_numargs = 1;
+       args.out_args[0].size = sizeof(outarg);
+       args.out_args[0].value = &outarg;
+       err = fuse_simple_request(fm, &args);
+       if (err)
+               return err;
+
+       sx = &outarg.stat;
+       if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
+           ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
+                                        inode_wrong_type(inode, sx->mode)))) {
+               make_bad_inode(inode);
+               return -EIO;
+       }
+
+       fuse_statx_to_attr(&outarg.stat, &attr);
+       if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
+               fuse_change_attributes(inode, &attr, &outarg.stat,
+                                      ATTR_TIMEOUT(&outarg), attr_version);
+       }
+
+       if (stat) {
+               stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
+               stat->btime.tv_sec = sx->btime.tv_sec;
+               stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
+               fuse_fillattr(inode, &attr, stat);
+               stat->result_mask |= STATX_TYPE;
+       }
+
+       return 0;
+}
+
 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
                           struct file *file)
 {
@@ -1189,8 +1267,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
                        fuse_make_bad(inode);
                        err = -EIO;
                } else {
-                       fuse_change_attributes(inode, &outarg.attr,
-                                              attr_timeout(&outarg),
+                       fuse_change_attributes(inode, &outarg.attr, NULL,
+                                              ATTR_TIMEOUT(&outarg),
                                               attr_version);
                        if (stat)
                                fuse_fillattr(inode, &outarg.attr, stat);
@@ -1204,12 +1282,22 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
                                unsigned int flags)
 {
        struct fuse_inode *fi = get_fuse_inode(inode);
+       struct fuse_conn *fc = get_fuse_conn(inode);
        int err = 0;
        bool sync;
        u32 inval_mask = READ_ONCE(fi->inval_mask);
        u32 cache_mask = fuse_get_cache_mask(inode);
 
-       if (flags & AT_STATX_FORCE_SYNC)
+
+       /* FUSE only supports basic stats and possibly btime */
+       request_mask &= STATX_BASIC_STATS | STATX_BTIME;
+retry:
+       if (fc->no_statx)
+               request_mask &= STATX_BASIC_STATS;
+
+       if (!request_mask)
+               sync = false;
+       else if (flags & AT_STATX_FORCE_SYNC)
                sync = true;
        else if (flags & AT_STATX_DONT_SYNC)
                sync = false;
@@ -1220,11 +1308,24 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
 
        if (sync) {
                forget_all_cached_acls(inode);
-               err = fuse_do_getattr(inode, stat, file);
+               /* Try statx if BTIME is requested */
+               if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
+                       err = fuse_do_statx(inode, file, stat);
+                       if (err == -ENOSYS) {
+                               fc->no_statx = 1;
+                               goto retry;
+                       }
+               } else {
+                       err = fuse_do_getattr(inode, stat, file);
+               }
        } else if (stat) {
                generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
                stat->mode = fi->orig_i_mode;
                stat->ino = fi->orig_ino;
+               if (test_bit(FUSE_I_BTIME, &fi->state)) {
+                       stat->btime = fi->i_btime;
+                       stat->result_mask |= STATX_BTIME;
+               }
        }
 
        return err;
@@ -1861,8 +1962,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
                /* FIXME: clear I_DIRTY_SYNC? */
        }
 
-       fuse_change_attributes_common(inode, &outarg.attr,
-                                     attr_timeout(&outarg),
+       fuse_change_attributes_common(inode, &outarg.attr, NULL,
+                                     ATTR_TIMEOUT(&outarg),
                                      fuse_get_cache_mask(inode));
        oldsize = inode->i_size;
        /* see the comment in fuse_change_attributes() */
index bc41152..1cdb632 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/uio.h>
 #include <linux/fs.h>
 #include <linux/filelock.h>
-#include <linux/file.h>
 
 static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
                          unsigned int open_flags, int opcode,
@@ -479,36 +478,48 @@ static void fuse_sync_writes(struct inode *inode)
        fuse_release_nowrite(inode);
 }
 
-struct fuse_flush_args {
-       struct fuse_args args;
-       struct fuse_flush_in inarg;
-       struct work_struct work;
-       struct file *file;
-};
-
-static int fuse_do_flush(struct fuse_flush_args *fa)
+static int fuse_flush(struct file *file, fl_owner_t id)
 {
-       int err;
-       struct inode *inode = file_inode(fa->file);
+       struct inode *inode = file_inode(file);
        struct fuse_mount *fm = get_fuse_mount(inode);
+       struct fuse_file *ff = file->private_data;
+       struct fuse_flush_in inarg;
+       FUSE_ARGS(args);
+       int err;
+
+       if (fuse_is_bad(inode))
+               return -EIO;
+
+       if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache)
+               return 0;
 
        err = write_inode_now(inode, 1);
        if (err)
-               goto out;
+               return err;
 
        inode_lock(inode);
        fuse_sync_writes(inode);
        inode_unlock(inode);
 
-       err = filemap_check_errors(fa->file->f_mapping);
+       err = filemap_check_errors(file->f_mapping);
        if (err)
-               goto out;
+               return err;
 
        err = 0;
        if (fm->fc->no_flush)
                goto inval_attr_out;
 
-       err = fuse_simple_request(fm, &fa->args);
+       memset(&inarg, 0, sizeof(inarg));
+       inarg.fh = ff->fh;
+       inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
+       args.opcode = FUSE_FLUSH;
+       args.nodeid = get_node_id(inode);
+       args.in_numargs = 1;
+       args.in_args[0].size = sizeof(inarg);
+       args.in_args[0].value = &inarg;
+       args.force = true;
+
+       err = fuse_simple_request(fm, &args);
        if (err == -ENOSYS) {
                fm->fc->no_flush = 1;
                err = 0;
@@ -521,57 +532,9 @@ inval_attr_out:
         */
        if (!err && fm->fc->writeback_cache)
                fuse_invalidate_attr_mask(inode, STATX_BLOCKS);
-
-out:
-       fput(fa->file);
-       kfree(fa);
        return err;
 }
 
-static void fuse_flush_async(struct work_struct *work)
-{
-       struct fuse_flush_args *fa = container_of(work, typeof(*fa), work);
-
-       fuse_do_flush(fa);
-}
-
-static int fuse_flush(struct file *file, fl_owner_t id)
-{
-       struct fuse_flush_args *fa;
-       struct inode *inode = file_inode(file);
-       struct fuse_mount *fm = get_fuse_mount(inode);
-       struct fuse_file *ff = file->private_data;
-
-       if (fuse_is_bad(inode))
-               return -EIO;
-
-       if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache)
-               return 0;
-
-       fa = kzalloc(sizeof(*fa), GFP_KERNEL);
-       if (!fa)
-               return -ENOMEM;
-
-       fa->inarg.fh = ff->fh;
-       fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
-       fa->args.opcode = FUSE_FLUSH;
-       fa->args.nodeid = get_node_id(inode);
-       fa->args.in_numargs = 1;
-       fa->args.in_args[0].size = sizeof(fa->inarg);
-       fa->args.in_args[0].value = &fa->inarg;
-       fa->args.force = true;
-       fa->file = get_file(file);
-
-       /* Don't wait if the task is exiting */
-       if (current->flags & PF_EXITING) {
-               INIT_WORK(&fa->work, fuse_flush_async);
-               schedule_work(&fa->work);
-               return 0;
-       }
-
-       return fuse_do_flush(fa);
-}
-
 int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
                      int datasync, int opcode)
 {
@@ -1465,7 +1428,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
        int write = flags & FUSE_DIO_WRITE;
        int cuse = flags & FUSE_DIO_CUSE;
        struct file *file = io->iocb->ki_filp;
-       struct inode *inode = file->f_mapping->host;
+       struct address_space *mapping = file->f_mapping;
+       struct inode *inode = mapping->host;
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fm->fc;
        size_t nmax = write ? fc->max_write : fc->max_read;
@@ -1477,12 +1441,20 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
        int err = 0;
        struct fuse_io_args *ia;
        unsigned int max_pages;
+       bool fopen_direct_io = ff->open_flags & FOPEN_DIRECT_IO;
 
        max_pages = iov_iter_npages(iter, fc->max_pages);
        ia = fuse_io_alloc(io, max_pages);
        if (!ia)
                return -ENOMEM;
 
+       if (fopen_direct_io && fc->direct_io_relax) {
+               res = filemap_write_and_wait_range(mapping, pos, pos + count - 1);
+               if (res) {
+                       fuse_io_free(ia);
+                       return res;
+               }
+       }
        if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
                if (!write)
                        inode_lock(inode);
@@ -1491,6 +1463,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
                        inode_unlock(inode);
        }
 
+       if (fopen_direct_io && write) {
+               res = invalidate_inode_pages2_range(mapping, idx_from, idx_to);
+               if (res) {
+                       fuse_io_free(ia);
+                       return res;
+               }
+       }
+
        io->should_dirty = !write && user_backed_iter(iter);
        while (count) {
                ssize_t nres;
@@ -2478,14 +2458,17 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct fuse_file *ff = file->private_data;
+       struct fuse_conn *fc = ff->fm->fc;
 
        /* DAX mmap is superior to direct_io mmap */
        if (FUSE_IS_DAX(file_inode(file)))
                return fuse_dax_mmap(file, vma);
 
        if (ff->open_flags & FOPEN_DIRECT_IO) {
-               /* Can't provide the coherency needed for MAP_SHARED */
-               if (vma->vm_flags & VM_MAYSHARE)
+               /* Can't provide the coherency needed for MAP_SHARED
+                * if FUSE_DIRECT_IO_RELAX isn't set.
+                */
+               if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_relax)
                        return -ENODEV;
 
                invalidate_inode_pages2(file->f_mapping);
index 9b7fc7d..bf0b85d 100644 (file)
@@ -88,6 +88,9 @@ struct fuse_inode {
            preserve the original mode */
        umode_t orig_i_mode;
 
+       /* Cache birthtime */
+       struct timespec64 i_btime;
+
        /** 64 bit inode number */
        u64 orig_ino;
 
@@ -167,6 +170,8 @@ enum {
        FUSE_I_SIZE_UNSTABLE,
        /* Bad inode */
        FUSE_I_BAD,
+       /* Has btime */
+       FUSE_I_BTIME,
 };
 
 struct fuse_conn;
@@ -792,6 +797,12 @@ struct fuse_conn {
        /* Is tmpfile not implemented by fs? */
        unsigned int no_tmpfile:1;
 
+       /* relax restrictions in FOPEN_DIRECT_IO mode */
+       unsigned int direct_io_relax:1;
+
+       /* Is statx not implemented by fs? */
+       unsigned int no_statx:1;
+
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
 
@@ -1058,9 +1069,11 @@ void fuse_init_symlink(struct inode *inode);
  * Change attributes of an inode
  */
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+                           struct fuse_statx *sx,
                            u64 attr_valid, u64 attr_version);
 
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+                                  struct fuse_statx *sx,
                                   u64 attr_valid, u32 cache_mask);
 
 u32 fuse_get_cache_mask(struct inode *inode);
@@ -1111,7 +1124,10 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
 
 void fuse_invalidate_atime(struct inode *inode);
 
-u64 entry_attr_timeout(struct fuse_entry_out *o);
+u64 fuse_time_to_jiffies(u64 sec, u32 nsec);
+#define ATTR_TIMEOUT(o) \
+       fuse_time_to_jiffies((o)->attr_valid, (o)->attr_valid_nsec)
+
 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o);
 
 /**
index 549358f..2e4eb7c 100644 (file)
@@ -77,7 +77,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
                return NULL;
 
        fi->i_time = 0;
-       fi->inval_mask = 0;
+       fi->inval_mask = ~0;
        fi->nodeid = 0;
        fi->nlookup = 0;
        fi->attr_version = 0;
@@ -163,6 +163,7 @@ static ino_t fuse_squash_ino(u64 ino64)
 }
 
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+                                  struct fuse_statx *sx,
                                   u64 attr_valid, u32 cache_mask)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -172,7 +173,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 
        fi->attr_version = atomic64_inc_return(&fc->attr_version);
        fi->i_time = attr_valid;
-       WRITE_ONCE(fi->inval_mask, 0);
+       /* Clear basic stats from invalid mask */
+       set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
 
        inode->i_ino     = fuse_squash_ino(attr->ino);
        inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
@@ -196,6 +198,25 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
        if (!(cache_mask & STATX_CTIME)) {
                inode_set_ctime(inode, attr->ctime, attr->ctimensec);
        }
+       if (sx) {
+               /* Sanitize nsecs */
+               sx->btime.tv_nsec =
+                       min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
+
+               /*
+                * Btime has been queried, cache is valid (whether or not btime
+                * is available or not) so clear STATX_BTIME from inval_mask.
+                *
+                * Availability of the btime attribute is indicated in
+                * FUSE_I_BTIME
+                */
+               set_mask_bits(&fi->inval_mask, STATX_BTIME, 0);
+               if (sx->mask & STATX_BTIME) {
+                       set_bit(FUSE_I_BTIME, &fi->state);
+                       fi->i_btime.tv_sec = sx->btime.tv_sec;
+                       fi->i_btime.tv_nsec = sx->btime.tv_nsec;
+               }
+       }
 
        if (attr->blksize != 0)
                inode->i_blkbits = ilog2(attr->blksize);
@@ -235,6 +256,7 @@ u32 fuse_get_cache_mask(struct inode *inode)
 }
 
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+                           struct fuse_statx *sx,
                            u64 attr_valid, u64 attr_version)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -269,7 +291,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
        }
 
        old_mtime = inode->i_mtime;
-       fuse_change_attributes_common(inode, attr, attr_valid, cache_mask);
+       fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask);
 
        oldsize = inode->i_size;
        /*
@@ -406,7 +428,7 @@ done:
        spin_lock(&fi->lock);
        fi->nlookup++;
        spin_unlock(&fi->lock);
-       fuse_change_attributes(inode, attr, attr_valid, attr_version);
+       fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
 
        return inode;
 }
@@ -1210,6 +1232,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
                                fc->init_security = 1;
                        if (flags & FUSE_CREATE_SUPP_GROUP)
                                fc->create_supp_group = 1;
+                       if (flags & FUSE_DIRECT_IO_RELAX)
+                               fc->direct_io_relax = 1;
                } else {
                        ra_pages = fc->max_read / PAGE_SIZE;
                        fc->no_lock = 1;
@@ -1256,7 +1280,7 @@ void fuse_send_init(struct fuse_mount *fm)
                FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
                FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
                FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
-               FUSE_HAS_EXPIRE_ONLY;
+               FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_RELAX;
 #ifdef CONFIG_FUSE_DAX
        if (fm->fc->dax)
                flags |= FUSE_MAP_ALIGNMENT;
index dc60347..9e6d587 100644 (file)
@@ -223,8 +223,8 @@ retry:
                spin_unlock(&fi->lock);
 
                forget_all_cached_acls(inode);
-               fuse_change_attributes(inode, &o->attr,
-                                      entry_attr_timeout(o),
+               fuse_change_attributes(inode, &o->attr, NULL,
+                                      ATTR_TIMEOUT(o),
                                       attr_version);
                /*
                 * The other branch comes via fuse_iget()
@@ -232,7 +232,7 @@ retry:
                 */
        } else {
                inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
-                                 &o->attr, entry_attr_timeout(o),
+                                 &o->attr, ATTR_TIMEOUT(o),
                                  attr_version);
                if (!inode)
                        inode = ERR_PTR(-ENOMEM);
@@ -243,8 +243,16 @@ retry:
                        dput(dentry);
                        dentry = alias;
                }
-               if (IS_ERR(dentry))
+               if (IS_ERR(dentry)) {
+                       if (!IS_ERR(inode)) {
+                               struct fuse_inode *fi = get_fuse_inode(inode);
+
+                               spin_lock(&fi->lock);
+                               fi->nlookup--;
+                               spin_unlock(&fi->lock);
+                       }
                        return PTR_ERR(dentry);
+               }
        }
        if (fc->readdirplus_auto)
                set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
index 9c4b26a..c26d483 100644 (file)
@@ -183,13 +183,13 @@ static int gfs2_writepages(struct address_space *mapping,
        int ret;
 
        /*
-        * Even if we didn't write any pages here, we might still be holding
+        * Even if we didn't write enough pages here, we might still be holding
         * dirty pages in the ail. We forcibly flush the ail because we don't
         * want balance_dirty_pages() to loop indefinitely trying to write out
         * pages held in the ail that it can't find.
         */
        ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops);
-       if (ret == 0)
+       if (ret == 0 && wbc->nr_to_write > 0)
                set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
        return ret;
 }
@@ -272,8 +272,7 @@ continue_unlock:
                                 * not be suitable for data integrity
                                 * writeout).
                                 */
-                               *done_index = folio->index +
-                                       folio_nr_pages(folio);
+                               *done_index = folio_next_index(folio);
                                ret = 1;
                                break;
                        }
index f62366b..ef7017f 100644 (file)
@@ -161,7 +161,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip)
        int error;
 
        down_write(&ip->i_rw_mutex);
-       page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
+       page = grab_cache_page(inode->i_mapping, 0);
        error = -ENOMEM;
        if (!page)
                goto out;
index 1438e74..9cbf8d9 100644 (file)
@@ -176,7 +176,7 @@ void gfs2_glock_free(struct gfs2_glock *gl)
        wake_up_glock(gl);
        call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
        if (atomic_dec_and_test(&sdp->sd_glock_disposal))
-               wake_up(&sdp->sd_glock_wait);
+               wake_up(&sdp->sd_kill_wait);
 }
 
 /**
@@ -468,10 +468,10 @@ done:
  * do_promote - promote as many requests as possible on the current queue
  * @gl: The glock
  * 
- * Returns: 1 if there is a blocked holder at the head of the list
+ * Returns true on success (i.e., progress was made or there are no waiters).
  */
 
-static int do_promote(struct gfs2_glock *gl)
+static bool do_promote(struct gfs2_glock *gl)
 {
        struct gfs2_holder *gh, *current_gh;
 
@@ -484,10 +484,10 @@ static int do_promote(struct gfs2_glock *gl)
                         * If we get here, it means we may not grant this
                         * holder for some reason. If this holder is at the
                         * head of the list, it means we have a blocked holder
-                        * at the head, so return 1.
+                        * at the head, so return false.
                         */
                        if (list_is_first(&gh->gh_list, &gl->gl_holders))
-                               return 1;
+                               return false;
                        do_error(gl, 0);
                        break;
                }
@@ -497,7 +497,7 @@ static int do_promote(struct gfs2_glock *gl)
                if (!current_gh)
                        current_gh = gh;
        }
-       return 0;
+       return true;
 }
 
 /**
@@ -591,10 +591,11 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
                if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
                        /* move to back of queue and try next entry */
                        if (ret & LM_OUT_CANCELED) {
-                               if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
-                                       list_move_tail(&gh->gh_list, &gl->gl_holders);
+                               list_move_tail(&gh->gh_list, &gl->gl_holders);
                                gh = find_first_waiter(gl);
                                gl->gl_target = gh->gh_state;
+                               if (do_promote(gl))
+                                       goto out;
                                goto retry;
                        }
                        /* Some error or failed "try lock" - report it */
@@ -679,8 +680,7 @@ __acquires(&gl->gl_lockref.lock)
            gh && !(gh->gh_flags & LM_FLAG_NOEXP))
                goto skip_inval;
 
-       lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
-                     LM_FLAG_PRIORITY);
+       lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP);
        GLOCK_BUG_ON(gl, gl->gl_state == target);
        GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
        if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
@@ -834,7 +834,7 @@ __acquires(&gl->gl_lockref.lock)
        } else {
                if (test_bit(GLF_DEMOTE, &gl->gl_flags))
                        gfs2_demote_wake(gl);
-               if (do_promote(gl) == 0)
+               if (do_promote(gl))
                        goto out_unlock;
                gh = find_first_waiter(gl);
                gl->gl_target = gh->gh_state;
@@ -1022,7 +1022,7 @@ static void delete_work_func(struct work_struct *work)
                 * step entirely.
                 */
                if (gfs2_try_evict(gl)) {
-                       if (test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+                       if (test_bit(SDF_KILL, &sdp->sd_flags))
                                goto out;
                        if (gfs2_queue_verify_evict(gl))
                                return;
@@ -1035,7 +1035,7 @@ static void delete_work_func(struct work_struct *work)
                                            GFS2_BLKST_UNLINKED);
                if (IS_ERR(inode)) {
                        if (PTR_ERR(inode) == -EAGAIN &&
-                           !test_bit(SDF_DEACTIVATING, &sdp->sd_flags) &&
+                           !test_bit(SDF_KILL, &sdp->sd_flags) &&
                            gfs2_queue_verify_evict(gl))
                                return;
                } else {
@@ -1231,7 +1231,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 out_free:
        gfs2_glock_dealloc(&gl->gl_rcu);
        if (atomic_dec_and_test(&sdp->sd_glock_disposal))
-               wake_up(&sdp->sd_glock_wait);
+               wake_up(&sdp->sd_kill_wait);
 
 out:
        return ret;
@@ -1515,27 +1515,20 @@ fail:
                }
                if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
                        continue;
-               if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
-                       insert_pt = &gh2->gh_list;
        }
        trace_gfs2_glock_queue(gh, 1);
        gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
        gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
        if (likely(insert_pt == NULL)) {
                list_add_tail(&gh->gh_list, &gl->gl_holders);
-               if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
-                       goto do_cancel;
                return;
        }
        list_add_tail(&gh->gh_list, insert_pt);
-do_cancel:
        gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
-       if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
-               spin_unlock(&gl->gl_lockref.lock);
-               if (sdp->sd_lockstruct.ls_ops->lm_cancel)
-                       sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
-               spin_lock(&gl->gl_lockref.lock);
-       }
+       spin_unlock(&gl->gl_lockref.lock);
+       if (sdp->sd_lockstruct.ls_ops->lm_cancel)
+               sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
+       spin_lock(&gl->gl_lockref.lock);
        return;
 
 trap_recursive:
@@ -2195,7 +2188,7 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
        flush_workqueue(glock_workqueue);
        glock_hash_walk(clear_glock, sdp);
        flush_workqueue(glock_workqueue);
-       wait_event_timeout(sdp->sd_glock_wait,
+       wait_event_timeout(sdp->sd_kill_wait,
                           atomic_read(&sdp->sd_glock_disposal) == 0,
                           HZ * 600);
        glock_hash_walk(dump_glock_func, sdp);
@@ -2227,8 +2220,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
                *p++ = 'e';
        if (flags & LM_FLAG_ANY)
                *p++ = 'A';
-       if (flags & LM_FLAG_PRIORITY)
-               *p++ = 'p';
        if (flags & LM_FLAG_NODE_SCOPE)
                *p++ = 'n';
        if (flags & GL_ASYNC)
index 1f1ba92..c8685ca 100644 (file)
@@ -68,14 +68,6 @@ enum {
  * also be granted in SHARED.  The preferred state is whichever is compatible
  * with other granted locks, or the specified state if no other locks exist.
  *
- * LM_FLAG_PRIORITY
- * Override fairness considerations.  Suppose a lock is held in a shared state
- * and there is a pending request for the deferred state.  A shared lock
- * request with the priority flag would be allowed to bypass the deferred
- * request and directly join the other shared lock.  A shared lock request
- * without the priority flag might be forced to wait until the deferred
- * requested had acquired and released the lock.
- *
  * LM_FLAG_NODE_SCOPE
  * This holder agrees to share the lock within this node. In other words,
  * the glock is held in EX mode according to DLM, but local holders on the
@@ -86,7 +78,6 @@ enum {
 #define LM_FLAG_TRY_1CB                0x0002
 #define LM_FLAG_NOEXP          0x0004
 #define LM_FLAG_ANY            0x0008
-#define LM_FLAG_PRIORITY       0x0010
 #define LM_FLAG_NODE_SCOPE     0x0020
 #define GL_ASYNC               0x0040
 #define GL_EXACT               0x0080
index aecdac3..d26759a 100644 (file)
@@ -637,7 +637,7 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 
        if (!remote || sb_rdonly(sdp->sd_vfs) ||
-           test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+           test_bit(SDF_KILL, &sdp->sd_flags))
                return;
 
        if (gl->gl_demote_state == LM_ST_UNLOCKED &&
index 04f2d78..a8c95c5 100644 (file)
@@ -452,7 +452,7 @@ struct gfs2_quota_data {
        s64 qd_change_sync;
 
        unsigned int qd_slot;
-       unsigned int qd_slot_count;
+       unsigned int qd_slot_ref;
 
        struct buffer_head *qd_bh;
        struct gfs2_quota_change *qd_bh_qc;
@@ -537,6 +537,7 @@ struct gfs2_statfs_change_host {
 #define GFS2_QUOTA_OFF         0
 #define GFS2_QUOTA_ACCOUNT     1
 #define GFS2_QUOTA_ON          2
+#define GFS2_QUOTA_QUIET       3 /* on but not complaining */
 
 #define GFS2_DATA_DEFAULT      GFS2_DATA_ORDERED
 #define GFS2_DATA_WRITEBACK    1
@@ -606,7 +607,7 @@ enum {
        SDF_REMOTE_WITHDRAW     = 13, /* Performing remote recovery */
        SDF_WITHDRAW_RECOVERY   = 14, /* Wait for journal recovery when we are
                                         withdrawing */
-       SDF_DEACTIVATING        = 15,
+       SDF_KILL                = 15,
        SDF_EVICTING            = 16,
        SDF_FROZEN              = 17,
 };
@@ -716,7 +717,7 @@ struct gfs2_sbd {
        struct gfs2_glock *sd_rename_gl;
        struct gfs2_glock *sd_freeze_gl;
        struct work_struct sd_freeze_work;
-       wait_queue_head_t sd_glock_wait;
+       wait_queue_head_t sd_kill_wait;
        wait_queue_head_t sd_async_glock_wait;
        atomic_t sd_glock_disposal;
        struct completion sd_locking_init;
index a21ac41..0eac045 100644 (file)
@@ -276,10 +276,16 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
         * gfs2_lookup_simple callers expect ENOENT
         * and do not check for NULL.
         */
-       if (inode == NULL)
-               return ERR_PTR(-ENOENT);
-       else
-               return inode;
+       if (IS_ERR_OR_NULL(inode))
+               return inode ? inode : ERR_PTR(-ENOENT);
+
+       /*
+        * Must not call back into the filesystem when allocating
+        * pages in the metadata inode's address space.
+        */
+       mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+
+       return inode;
 }
 
 
index 5491129..59ab18c 100644 (file)
@@ -222,11 +222,6 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
                lkf |= DLM_LKF_NOQUEUEBAST;
        }
 
-       if (gfs_flags & LM_FLAG_PRIORITY) {
-               lkf |= DLM_LKF_NOORDER;
-               lkf |= DLM_LKF_HEADQUE;
-       }
-
        if (gfs_flags & LM_FLAG_ANY) {
                if (req == DLM_LOCK_PR)
                        lkf |= DLM_LKF_ALTCW;
index aa56879..e5271ae 100644 (file)
@@ -1227,6 +1227,21 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
        gfs2_log_unlock(sdp);
 }
 
+static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
+{
+       return atomic_read(&sdp->sd_log_pinned) +
+              atomic_read(&sdp->sd_log_blks_needed) >=
+              atomic_read(&sdp->sd_log_thresh1);
+}
+
+static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
+{
+       return sdp->sd_jdesc->jd_blocks -
+              atomic_read(&sdp->sd_log_blks_free) +
+              atomic_read(&sdp->sd_log_blks_needed) >=
+              atomic_read(&sdp->sd_log_thresh2);
+}
+
 /**
  * gfs2_log_commit - Commit a transaction to the log
  * @sdp: the filesystem
@@ -1246,9 +1261,7 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
        log_refund(sdp, tr);
 
-       if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
-           ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
-           atomic_read(&sdp->sd_log_thresh2)))
+       if (gfs2_ail_flush_reqd(sdp) || gfs2_jrnl_flush_reqd(sdp))
                wake_up(&sdp->sd_logd_waitq);
 }
 
@@ -1271,24 +1284,6 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
        gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
 }
 
-static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
-{
-       return (atomic_read(&sdp->sd_log_pinned) +
-               atomic_read(&sdp->sd_log_blks_needed) >=
-               atomic_read(&sdp->sd_log_thresh1));
-}
-
-static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
-{
-       unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
-
-       if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
-               return 1;
-
-       return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
-               atomic_read(&sdp->sd_log_thresh2);
-}
-
 /**
  * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
  * @data: Pointer to GFS2 superblock
@@ -1301,14 +1296,11 @@ int gfs2_logd(void *data)
 {
        struct gfs2_sbd *sdp = data;
        unsigned long t = 1;
-       DEFINE_WAIT(wait);
 
        while (!kthread_should_stop()) {
+               if (gfs2_withdrawn(sdp))
+                       break;
 
-               if (gfs2_withdrawn(sdp)) {
-                       msleep_interruptible(HZ);
-                       continue;
-               }
                /* Check for errors writing to the journal */
                if (sdp->sd_log_error) {
                        gfs2_lm(sdp,
@@ -1317,7 +1309,7 @@ int gfs2_logd(void *data)
                                "prevent further damage.\n",
                                sdp->sd_fsname, sdp->sd_log_error);
                        gfs2_withdraw(sdp);
-                       continue;
+                       break;
                }
 
                if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
@@ -1326,7 +1318,9 @@ int gfs2_logd(void *data)
                                                  GFS2_LFC_LOGD_JFLUSH_REQD);
                }
 
-               if (gfs2_ail_flush_reqd(sdp)) {
+               if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+                   gfs2_ail_flush_reqd(sdp)) {
+                       clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
                        gfs2_ail1_start(sdp);
                        gfs2_ail1_wait(sdp);
                        gfs2_ail1_empty(sdp, 0);
@@ -1338,17 +1332,14 @@ int gfs2_logd(void *data)
 
                try_to_freeze();
 
-               do {
-                       prepare_to_wait(&sdp->sd_logd_waitq, &wait,
-                                       TASK_INTERRUPTIBLE);
-                       if (!gfs2_ail_flush_reqd(sdp) &&
-                           !gfs2_jrnl_flush_reqd(sdp) &&
-                           !kthread_should_stop())
-                               t = schedule_timeout(t);
-               } while(t && !gfs2_ail_flush_reqd(sdp) &&
-                       !gfs2_jrnl_flush_reqd(sdp) &&
-                       !kthread_should_stop());
-               finish_wait(&sdp->sd_logd_waitq, &wait);
+               t = wait_event_interruptible_timeout(sdp->sd_logd_waitq,
+                               test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+                               gfs2_ail_flush_reqd(sdp) ||
+                               gfs2_jrnl_flush_reqd(sdp) ||
+                               sdp->sd_log_error ||
+                               gfs2_withdrawn(sdp) ||
+                               kthread_should_stop(),
+                               t);
        }
 
        return 0;
index 251322b..483f698 100644 (file)
@@ -456,7 +456,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
  * Find the folio with 'index' in the journal's mapping. Search the folio for
  * the journal head if requested (cleanup == false). Release refs on the
  * folio so the page cache can reclaim it. We grabbed a
- * reference on this folio twice, first when we did a find_or_create_page()
+ * reference on this folio twice, first when we did a grab_cache_page()
  * to obtain the folio to add it to the bio and second when we do a
  * filemap_get_folio() here to get the folio to wait on while I/O on it is being
  * completed.
@@ -481,7 +481,7 @@ static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index,
        if (!*done)
                *done = gfs2_jhead_pg_srch(jd, head, &folio->page);
 
-       /* filemap_get_folio() and the earlier find_or_create_page() */
+       /* filemap_get_folio() and the earlier grab_cache_page() */
        folio_put_refs(folio, 2);
 }
 
@@ -535,8 +535,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head,
 
                for (; block < je->lblock + je->blocks; block++, dblock++) {
                        if (!page) {
-                               page = find_or_create_page(mapping,
-                                               block >> shift, GFP_NOFS);
+                               page = grab_cache_page(mapping, block >> shift);
                                if (!page) {
                                        ret = -ENOMEM;
                                        done = true;
index afcb328..66eb98b 100644 (file)
@@ -152,9 +152,9 @@ static int __init init_gfs2_fs(void)
                goto fail_shrinker;
 
        error = -ENOMEM;
-       gfs_recovery_wq = alloc_workqueue("gfs_recovery",
+       gfs2_recovery_wq = alloc_workqueue("gfs2_recovery",
                                          WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
-       if (!gfs_recovery_wq)
+       if (!gfs2_recovery_wq)
                goto fail_wq1;
 
        gfs2_control_wq = alloc_workqueue("gfs2_control",
@@ -162,7 +162,7 @@ static int __init init_gfs2_fs(void)
        if (!gfs2_control_wq)
                goto fail_wq2;
 
-       gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0);
+       gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", 0, 0);
 
        if (!gfs2_freeze_wq)
                goto fail_wq3;
@@ -194,7 +194,7 @@ fail_mempool:
 fail_wq3:
        destroy_workqueue(gfs2_control_wq);
 fail_wq2:
-       destroy_workqueue(gfs_recovery_wq);
+       destroy_workqueue(gfs2_recovery_wq);
 fail_wq1:
        unregister_shrinker(&gfs2_qd_shrinker);
 fail_shrinker:
@@ -234,7 +234,7 @@ static void __exit exit_gfs2_fs(void)
        gfs2_unregister_debugfs();
        unregister_filesystem(&gfs2_fs_type);
        unregister_filesystem(&gfs2meta_fs_type);
-       destroy_workqueue(gfs_recovery_wq);
+       destroy_workqueue(gfs2_recovery_wq);
        destroy_workqueue(gfs2_control_wq);
        destroy_workqueue(gfs2_freeze_wq);
        list_lru_destroy(&gfs2_qd_lru);
index 8a27957..33ca047 100644 (file)
@@ -87,7 +87,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
        set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
        gfs2_tune_init(&sdp->sd_tune);
 
-       init_waitqueue_head(&sdp->sd_glock_wait);
+       init_waitqueue_head(&sdp->sd_kill_wait);
        init_waitqueue_head(&sdp->sd_async_glock_wait);
        atomic_set(&sdp->sd_glock_disposal, 0);
        init_completion(&sdp->sd_locking_init);
@@ -1103,29 +1103,49 @@ static int init_threads(struct gfs2_sbd *sdp)
        struct task_struct *p;
        int error = 0;
 
-       p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
+       p = kthread_create(gfs2_logd, sdp, "gfs2_logd/%s", sdp->sd_fsname);
        if (IS_ERR(p)) {
                error = PTR_ERR(p);
-               fs_err(sdp, "can't start logd thread: %d\n", error);
+               fs_err(sdp, "can't create logd thread: %d\n", error);
                return error;
        }
+       get_task_struct(p);
        sdp->sd_logd_process = p;
 
-       p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
+       p = kthread_create(gfs2_quotad, sdp, "gfs2_quotad/%s", sdp->sd_fsname);
        if (IS_ERR(p)) {
                error = PTR_ERR(p);
-               fs_err(sdp, "can't start quotad thread: %d\n", error);
+               fs_err(sdp, "can't create quotad thread: %d\n", error);
                goto fail;
        }
+       get_task_struct(p);
        sdp->sd_quotad_process = p;
+
+       wake_up_process(sdp->sd_logd_process);
+       wake_up_process(sdp->sd_quotad_process);
        return 0;
 
 fail:
        kthread_stop(sdp->sd_logd_process);
+       put_task_struct(sdp->sd_logd_process);
        sdp->sd_logd_process = NULL;
        return error;
 }
 
+void gfs2_destroy_threads(struct gfs2_sbd *sdp)
+{
+       if (sdp->sd_logd_process) {
+               kthread_stop(sdp->sd_logd_process);
+               put_task_struct(sdp->sd_logd_process);
+               sdp->sd_logd_process = NULL;
+       }
+       if (sdp->sd_quotad_process) {
+               kthread_stop(sdp->sd_quotad_process);
+               put_task_struct(sdp->sd_quotad_process);
+               sdp->sd_quotad_process = NULL;
+       }
+}
+
 /**
  * gfs2_fill_super - Read in superblock
  * @sb: The VFS superblock
@@ -1276,12 +1296,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
 
        if (error) {
                gfs2_freeze_unlock(&sdp->sd_freeze_gh);
-               if (sdp->sd_quotad_process)
-                       kthread_stop(sdp->sd_quotad_process);
-               sdp->sd_quotad_process = NULL;
-               if (sdp->sd_logd_process)
-                       kthread_stop(sdp->sd_logd_process);
-               sdp->sd_logd_process = NULL;
+               gfs2_destroy_threads(sdp);
                fs_err(sdp, "can't make FS RW: %d\n", error);
                goto fail_per_node;
        }
@@ -1381,6 +1396,7 @@ static const struct constant_table gfs2_param_quota[] = {
        {"off",        GFS2_QUOTA_OFF},
        {"account",    GFS2_QUOTA_ACCOUNT},
        {"on",         GFS2_QUOTA_ON},
+       {"quiet",      GFS2_QUOTA_QUIET},
        {}
 };
 
@@ -1786,9 +1802,9 @@ static void gfs2_kill_sb(struct super_block *sb)
        /*
         * Flush and then drain the delete workqueue here (via
         * destroy_workqueue()) to ensure that any delete work that
-        * may be running will also see the SDF_DEACTIVATING flag.
+        * may be running will also see the SDF_KILL flag.
         */
-       set_bit(SDF_DEACTIVATING, &sdp->sd_flags);
+       set_bit(SDF_KILL, &sdp->sd_flags);
        gfs2_flush_delete_work(sdp);
        destroy_workqueue(sdp->sd_delete_wq);
 
index aa5fd06..171b271 100644 (file)
@@ -109,38 +109,44 @@ static inline void spin_unlock_bucket(unsigned int hash)
 static void gfs2_qd_dealloc(struct rcu_head *rcu)
 {
        struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu);
+       struct gfs2_sbd *sdp = qd->qd_sbd;
+
        kmem_cache_free(gfs2_quotad_cachep, qd);
+       if (atomic_dec_and_test(&sdp->sd_quota_count))
+               wake_up(&sdp->sd_kill_wait);
 }
 
-static void gfs2_qd_dispose(struct list_head *list)
+static void gfs2_qd_dispose(struct gfs2_quota_data *qd)
 {
-       struct gfs2_quota_data *qd;
-       struct gfs2_sbd *sdp;
-
-       while (!list_empty(list)) {
-               qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
-               sdp = qd->qd_gl->gl_name.ln_sbd;
-
-               list_del(&qd->qd_lru);
+       struct gfs2_sbd *sdp = qd->qd_sbd;
 
-               /* Free from the filesystem-specific list */
-               spin_lock(&qd_lock);
-               list_del(&qd->qd_list);
-               spin_unlock(&qd_lock);
+       spin_lock(&qd_lock);
+       list_del(&qd->qd_list);
+       spin_unlock(&qd_lock);
 
-               spin_lock_bucket(qd->qd_hash);
-               hlist_bl_del_rcu(&qd->qd_hlist);
-               spin_unlock_bucket(qd->qd_hash);
+       spin_lock_bucket(qd->qd_hash);
+       hlist_bl_del_rcu(&qd->qd_hlist);
+       spin_unlock_bucket(qd->qd_hash);
 
+       if (!gfs2_withdrawn(sdp)) {
                gfs2_assert_warn(sdp, !qd->qd_change);
-               gfs2_assert_warn(sdp, !qd->qd_slot_count);
+               gfs2_assert_warn(sdp, !qd->qd_slot_ref);
                gfs2_assert_warn(sdp, !qd->qd_bh_count);
+       }
 
-               gfs2_glock_put(qd->qd_gl);
-               atomic_dec(&sdp->sd_quota_count);
+       gfs2_glock_put(qd->qd_gl);
+       call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
+}
 
-               /* Delete it from the common reclaim list */
-               call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
+static void gfs2_qd_list_dispose(struct list_head *list)
+{
+       struct gfs2_quota_data *qd;
+
+       while (!list_empty(list)) {
+               qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
+               list_del(&qd->qd_lru);
+
+               gfs2_qd_dispose(qd);
        }
 }
 
@@ -149,18 +155,22 @@ static enum lru_status gfs2_qd_isolate(struct list_head *item,
                struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
 {
        struct list_head *dispose = arg;
-       struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru);
+       struct gfs2_quota_data *qd =
+               list_entry(item, struct gfs2_quota_data, qd_lru);
+       enum lru_status status;
 
        if (!spin_trylock(&qd->qd_lockref.lock))
                return LRU_SKIP;
 
+       status = LRU_SKIP;
        if (qd->qd_lockref.count == 0) {
                lockref_mark_dead(&qd->qd_lockref);
                list_lru_isolate_move(lru, &qd->qd_lru, dispose);
+               status = LRU_REMOVED;
        }
 
        spin_unlock(&qd->qd_lockref.lock);
-       return LRU_REMOVED;
+       return status;
 }
 
 static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
@@ -175,7 +185,7 @@ static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
        freed = list_lru_shrink_walk(&gfs2_qd_lru, sc,
                                     gfs2_qd_isolate, &dispose);
 
-       gfs2_qd_dispose(&dispose);
+       gfs2_qd_list_dispose(&dispose);
 
        return freed;
 }
@@ -203,12 +213,7 @@ static u64 qd2index(struct gfs2_quota_data *qd)
 
 static u64 qd2offset(struct gfs2_quota_data *qd)
 {
-       u64 offset;
-
-       offset = qd2index(qd);
-       offset *= sizeof(struct gfs2_quota);
-
-       return offset;
+       return qd2index(qd) * sizeof(struct gfs2_quota);
 }
 
 static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid)
@@ -221,7 +226,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str
                return NULL;
 
        qd->qd_sbd = sdp;
-       qd->qd_lockref.count = 1;
+       qd->qd_lockref.count = 0;
        spin_lock_init(&qd->qd_lockref.lock);
        qd->qd_id = qid;
        qd->qd_slot = -1;
@@ -283,6 +288,7 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
        spin_lock_bucket(hash);
        *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
        if (qd == NULL) {
+               new_qd->qd_lockref.count++;
                *qdp = new_qd;
                list_add(&new_qd->qd_list, &sdp->sd_quota_list);
                hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]);
@@ -302,20 +308,31 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
 
 static void qd_hold(struct gfs2_quota_data *qd)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref));
        lockref_get(&qd->qd_lockref);
 }
 
 static void qd_put(struct gfs2_quota_data *qd)
 {
+       struct gfs2_sbd *sdp;
+
        if (lockref_put_or_lock(&qd->qd_lockref))
                return;
 
+       BUG_ON(__lockref_is_dead(&qd->qd_lockref));
+       sdp = qd->qd_sbd;
+       if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
+               lockref_mark_dead(&qd->qd_lockref);
+               spin_unlock(&qd->qd_lockref.lock);
+
+               gfs2_qd_dispose(qd);
+               return;
+       }
+
        qd->qd_lockref.count = 0;
        list_lru_add(&gfs2_qd_lru, &qd->qd_lru);
        spin_unlock(&qd->qd_lockref.lock);
-
 }
 
 static int slot_get(struct gfs2_quota_data *qd)
@@ -325,20 +342,19 @@ static int slot_get(struct gfs2_quota_data *qd)
        int error = 0;
 
        spin_lock(&sdp->sd_bitmap_lock);
-       if (qd->qd_slot_count != 0)
-               goto out;
-
-       error = -ENOSPC;
-       bit = find_first_zero_bit(sdp->sd_quota_bitmap, sdp->sd_quota_slots);
-       if (bit < sdp->sd_quota_slots) {
+       if (qd->qd_slot_ref == 0) {
+               bit = find_first_zero_bit(sdp->sd_quota_bitmap,
+                                         sdp->sd_quota_slots);
+               if (bit >= sdp->sd_quota_slots) {
+                       error = -ENOSPC;
+                       goto out;
+               }
                set_bit(bit, sdp->sd_quota_bitmap);
                qd->qd_slot = bit;
-               error = 0;
-out:
-               qd->qd_slot_count++;
        }
+       qd->qd_slot_ref++;
+out:
        spin_unlock(&sdp->sd_bitmap_lock);
-
        return error;
 }
 
@@ -347,8 +363,8 @@ static void slot_hold(struct gfs2_quota_data *qd)
        struct gfs2_sbd *sdp = qd->qd_sbd;
 
        spin_lock(&sdp->sd_bitmap_lock);
-       gfs2_assert(sdp, qd->qd_slot_count);
-       qd->qd_slot_count++;
+       gfs2_assert(sdp, qd->qd_slot_ref);
+       qd->qd_slot_ref++;
        spin_unlock(&sdp->sd_bitmap_lock);
 }
 
@@ -357,8 +373,8 @@ static void slot_put(struct gfs2_quota_data *qd)
        struct gfs2_sbd *sdp = qd->qd_sbd;
 
        spin_lock(&sdp->sd_bitmap_lock);
-       gfs2_assert(sdp, qd->qd_slot_count);
-       if (!--qd->qd_slot_count) {
+       gfs2_assert(sdp, qd->qd_slot_ref);
+       if (!--qd->qd_slot_ref) {
                BUG_ON(!test_and_clear_bit(qd->qd_slot, sdp->sd_quota_bitmap));
                qd->qd_slot = -1;
        }
@@ -367,7 +383,7 @@ static void slot_put(struct gfs2_quota_data *qd)
 
 static int bh_get(struct gfs2_quota_data *qd)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        struct inode *inode = sdp->sd_qc_inode;
        struct gfs2_inode *ip = GFS2_I(inode);
        unsigned int block, offset;
@@ -421,7 +437,7 @@ fail:
 
 static void bh_put(struct gfs2_quota_data *qd)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
 
        mutex_lock(&sdp->sd_quota_mutex);
        gfs2_assert(sdp, qd->qd_bh_count);
@@ -451,6 +467,20 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd,
        return 1;
 }
 
+static int qd_bh_get_or_undo(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
+{
+       int error;
+
+       error = bh_get(qd);
+       if (!error)
+               return 0;
+
+       clear_bit(QDF_LOCKED, &qd->qd_flags);
+       slot_put(qd);
+       qd_put(qd);
+       return error;
+}
+
 static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
 {
        struct gfs2_quota_data *qd = NULL, *iter;
@@ -473,30 +503,29 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
        spin_unlock(&qd_lock);
 
        if (qd) {
-               error = bh_get(qd);
-               if (error) {
-                       clear_bit(QDF_LOCKED, &qd->qd_flags);
-                       slot_put(qd);
-                       qd_put(qd);
+               error = qd_bh_get_or_undo(sdp, qd);
+               if (error)
                        return error;
-               }
+               *qdp = qd;
        }
 
-       *qdp = qd;
-
        return 0;
 }
 
-static void qd_unlock(struct gfs2_quota_data *qd)
+static void qdsb_put(struct gfs2_quota_data *qd)
 {
-       gfs2_assert_warn(qd->qd_gl->gl_name.ln_sbd,
-                        test_bit(QDF_LOCKED, &qd->qd_flags));
-       clear_bit(QDF_LOCKED, &qd->qd_flags);
        bh_put(qd);
        slot_put(qd);
        qd_put(qd);
 }
 
+static void qd_unlock(struct gfs2_quota_data *qd)
+{
+       gfs2_assert_warn(qd->qd_sbd, test_bit(QDF_LOCKED, &qd->qd_flags));
+       clear_bit(QDF_LOCKED, &qd->qd_flags);
+       qdsb_put(qd);
+}
+
 static int qdsb_get(struct gfs2_sbd *sdp, struct kqid qid,
                    struct gfs2_quota_data **qdp)
 {
@@ -523,13 +552,6 @@ fail:
        return error;
 }
 
-static void qdsb_put(struct gfs2_quota_data *qd)
-{
-       bh_put(qd);
-       slot_put(qd);
-       qd_put(qd);
-}
-
 /**
  * gfs2_qa_get - make sure we have a quota allocations data structure,
  *               if necessary
@@ -666,7 +688,7 @@ static int sort_qd(const void *a, const void *b)
 
 static void do_qc(struct gfs2_quota_data *qd, s64 change, int qc_type)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
        struct gfs2_quota_change *qc = qd->qd_bh_qc;
        s64 x;
@@ -708,30 +730,29 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change, int qc_type)
        mutex_unlock(&sdp->sd_quota_mutex);
 }
 
-static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
+static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index,
                                  unsigned off, void *buf, unsigned bytes)
 {
+       struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
        struct inode *inode = &ip->i_inode;
-       struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
        struct buffer_head *bh;
        u64 blk;
        unsigned bsize = sdp->sd_sb.sb_bsize, bnum = 0, boff = 0;
        unsigned to_write = bytes, pg_off = off;
-       int done = 0;
 
        blk = index << (PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift);
        boff = off % bsize;
 
-       page = find_or_create_page(mapping, index, GFP_NOFS);
+       page = grab_cache_page(mapping, index);
        if (!page)
                return -ENOMEM;
        if (!page_has_buffers(page))
                create_empty_buffers(page, bsize, 0);
 
        bh = page_buffers(page);
-       while (!done) {
+       for(;;) {
                /* Find the beginning block within the page */
                if (pg_off >= ((bnum * bsize) + bsize)) {
                        bh = bh->b_this_page;
@@ -751,10 +772,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
                        set_buffer_uptodate(bh);
                if (bh_read(bh, REQ_META | REQ_PRIO) < 0)
                        goto unlock_out;
-               if (gfs2_is_jdata(ip))
-                       gfs2_trans_add_data(ip->i_gl, bh);
-               else
-                       gfs2_ordered_add_inode(ip);
+               gfs2_trans_add_data(ip->i_gl, bh);
 
                /* If we need to write to the next block as well */
                if (to_write > (bsize - boff)) {
@@ -763,7 +781,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
                        boff = pg_off % bsize;
                        continue;
                }
-               done = 1;
+               break;
        }
 
        /* Write to the page, now that we have setup the buffer(s) */
@@ -780,12 +798,12 @@ unlock_out:
        return -EIO;
 }
 
-static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
+static int gfs2_write_disk_quota(struct gfs2_sbd *sdp, struct gfs2_quota *qp,
                                 loff_t loc)
 {
        unsigned long pg_beg;
        unsigned pg_off, nbytes, overflow = 0;
-       int pg_oflow = 0, error;
+       int error;
        void *ptr;
 
        nbytes = sizeof(struct gfs2_quota);
@@ -794,17 +812,15 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
        pg_off = offset_in_page(loc);
 
        /* If the quota straddles a page boundary, split the write in two */
-       if ((pg_off + nbytes) > PAGE_SIZE) {
-               pg_oflow = 1;
+       if ((pg_off + nbytes) > PAGE_SIZE)
                overflow = (pg_off + nbytes) - PAGE_SIZE;
-       }
 
        ptr = qp;
-       error = gfs2_write_buf_to_page(ip, pg_beg, pg_off, ptr,
+       error = gfs2_write_buf_to_page(sdp, pg_beg, pg_off, ptr,
                                       nbytes - overflow);
        /* If there's an overflow, write the remaining bytes to the next page */
-       if (!error && pg_oflow)
-               error = gfs2_write_buf_to_page(ip, pg_beg + 1, 0,
+       if (!error && overflow)
+               error = gfs2_write_buf_to_page(sdp, pg_beg + 1, 0,
                                               ptr + nbytes - overflow,
                                               overflow);
        return error;
@@ -812,7 +828,7 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
 
 /**
  * gfs2_adjust_quota - adjust record of current block usage
- * @ip: The quota inode
+ * @sdp: The superblock
  * @loc: Offset of the entry in the quota file
  * @change: The amount of usage change to record
  * @qd: The quota data
@@ -824,12 +840,12 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
  * Returns: 0 or -ve on error
  */
 
-static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
+static int gfs2_adjust_quota(struct gfs2_sbd *sdp, loff_t loc,
                             s64 change, struct gfs2_quota_data *qd,
                             struct qc_dqblk *fdq)
 {
+       struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
        struct inode *inode = &ip->i_inode;
-       struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_quota q;
        int err;
        u64 size;
@@ -846,7 +862,6 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                return err;
 
        loc -= sizeof(q); /* gfs2_internal_read would've advanced the loc ptr */
-       err = -EIO;
        be64_add_cpu(&q.qu_value, change);
        if (((s64)be64_to_cpu(q.qu_value)) < 0)
                q.qu_value = 0; /* Never go negative on quota usage */
@@ -866,7 +881,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                }
        }
 
-       err = gfs2_write_disk_quota(ip, &q, loc);
+       err = gfs2_write_disk_quota(sdp, &q, loc);
        if (!err) {
                size = loc + sizeof(struct gfs2_quota);
                if (size > inode->i_size)
@@ -881,7 +896,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 
 static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 {
-       struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = (*qda)->qd_sbd;
        struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
        struct gfs2_alloc_parms ap = { .aflags = 0, };
        unsigned int data_blocks, ind_blocks;
@@ -893,18 +908,12 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
        unsigned int nalloc = 0, blocks;
        int error;
 
-       error = gfs2_qa_get(ip);
-       if (error)
-               return error;
-
        gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
                              &data_blocks, &ind_blocks);
 
        ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
-       if (!ghs) {
-               error = -ENOMEM;
-               goto out;
-       }
+       if (!ghs)
+               return -ENOMEM;
 
        sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
        inode_lock(&ip->i_inode);
@@ -953,7 +962,8 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
        for (x = 0; x < num_qd; x++) {
                qd = qda[x];
                offset = qd2offset(qd);
-               error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL);
+               error = gfs2_adjust_quota(sdp, offset, qd->qd_change_sync, qd,
+                                                       NULL);
                if (error)
                        goto out_end_trans;
 
@@ -961,8 +971,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                set_bit(QDF_REFRESH, &qd->qd_flags);
        }
 
-       error = 0;
-
 out_end_trans:
        gfs2_trans_end(sdp);
 out_ipres:
@@ -976,8 +984,10 @@ out_dq:
        kfree(ghs);
        gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl,
                       GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC);
-out:
-       gfs2_qa_put(ip);
+       if (!error) {
+               for (x = 0; x < num_qd; x++)
+                       qda[x]->qd_sync_gen = sdp->sd_quota_sync_gen;
+       }
        return error;
 }
 
@@ -1009,11 +1019,12 @@ static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
 static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
                    struct gfs2_holder *q_gh)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
        struct gfs2_holder i_gh;
        int error;
 
+       gfs2_assert_warn(sdp, sdp == qd->qd_gl->gl_name.ln_sbd);
 restart:
        error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
        if (error)
@@ -1059,9 +1070,10 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_quota_data *qd;
        u32 x;
-       int error = 0;
+       int error;
 
-       if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
+       if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON &&
+           sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET)
                return 0;
 
        error = gfs2_quota_hold(ip, uid, gid);
@@ -1089,16 +1101,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
        return error;
 }
 
-static int need_sync(struct gfs2_quota_data *qd)
+static bool need_sync(struct gfs2_quota_data *qd)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
        struct gfs2_tune *gt = &sdp->sd_tune;
        s64 value;
        unsigned int num, den;
-       int do_sync = 1;
 
        if (!qd->qd_qb.qb_limit)
-               return 0;
+               return false;
 
        spin_lock(&qd_lock);
        value = qd->qd_change;
@@ -1109,26 +1120,26 @@ static int need_sync(struct gfs2_quota_data *qd)
        den = gt->gt_quota_scale_den;
        spin_unlock(&gt->gt_spin);
 
-       if (value < 0)
-               do_sync = 0;
+       if (value <= 0)
+               return false;
        else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >=
                 (s64)be64_to_cpu(qd->qd_qb.qb_limit))
-               do_sync = 0;
+               return false;
        else {
                value *= gfs2_jindex_size(sdp) * num;
                value = div_s64(value, den);
                value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
                if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
-                       do_sync = 0;
+                       return false;
        }
 
-       return do_sync;
+       return true;
 }
 
 void gfs2_quota_unlock(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       struct gfs2_quota_data *qda[4];
+       struct gfs2_quota_data *qda[2 * GFS2_MAXQUOTAS];
        unsigned int count = 0;
        u32 x;
        int found;
@@ -1138,7 +1149,7 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
 
        for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
                struct gfs2_quota_data *qd;
-               int sync;
+               bool sync;
 
                qd = ip->i_qadata->qa_qd[x];
                sync = need_sync(qd);
@@ -1154,15 +1165,8 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
                if (!found)
                        continue;
 
-               gfs2_assert_warn(sdp, qd->qd_change_sync);
-               if (bh_get(qd)) {
-                       clear_bit(QDF_LOCKED, &qd->qd_flags);
-                       slot_put(qd);
-                       qd_put(qd);
-                       continue;
-               }
-
-               qda[count++] = qd;
+               if (!qd_bh_get_or_undo(sdp, qd))
+                       qda[count++] = qd;
        }
 
        if (count) {
@@ -1178,12 +1182,13 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
 
 static int print_message(struct gfs2_quota_data *qd, char *type)
 {
-       struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+       struct gfs2_sbd *sdp = qd->qd_sbd;
 
-       fs_info(sdp, "quota %s for %s %u\n",
-               type,
-               (qd->qd_id.type == USRQUOTA) ? "user" : "group",
-               from_kqid(&init_user_ns, qd->qd_id));
+       if (sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET)
+               fs_info(sdp, "quota %s for %s %u\n",
+                       type,
+                       (qd->qd_id.type == USRQUOTA) ? "user" : "group",
+                       from_kqid(&init_user_ns, qd->qd_id));
 
        return 0;
 }
@@ -1269,7 +1274,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        u32 x;
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
-       if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON ||
+       if ((sdp->sd_args.ar_quota != GFS2_QUOTA_ON &&
+           sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) ||
            gfs2_assert_warn(sdp, change))
                return;
        if (ip->i_diskflags & GFS2_DIF_SYSTEM)
@@ -1288,6 +1294,24 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
        }
 }
 
+static bool qd_changed(struct gfs2_sbd *sdp)
+{
+       struct gfs2_quota_data *qd;
+       bool changed = false;
+
+       spin_lock(&qd_lock);
+       list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
+               if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
+                   !test_bit(QDF_CHANGE, &qd->qd_flags))
+                       continue;
+
+               changed = true;
+               break;
+       }
+       spin_unlock(&qd_lock);
+       return changed;
+}
+
 int gfs2_quota_sync(struct super_block *sb, int type)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
@@ -1297,6 +1321,9 @@ int gfs2_quota_sync(struct super_block *sb, int type)
        unsigned int x;
        int error = 0;
 
+       if (!qd_changed(sdp))
+               return 0;
+
        qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
        if (!qda)
                return -ENOMEM;
@@ -1318,10 +1345,6 @@ int gfs2_quota_sync(struct super_block *sb, int type)
                if (num_qd) {
                        if (!error)
                                error = do_sync(num_qd, qda);
-                       if (!error)
-                               for (x = 0; x < num_qd; x++)
-                                       qda[x]->qd_sync_gen =
-                                               sdp->sd_quota_sync_gen;
 
                        for (x = 0; x < num_qd; x++)
                                qd_unlock(qda[x]);
@@ -1423,7 +1446,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
                        set_bit(QDF_CHANGE, &qd->qd_flags);
                        qd->qd_change = qc_change;
                        qd->qd_slot = slot;
-                       qd->qd_slot_count = 1;
+                       qd->qd_slot_ref = 1;
 
                        spin_lock(&qd_lock);
                        BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap));
@@ -1455,36 +1478,35 @@ fail:
 
 void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
 {
-       struct list_head *head = &sdp->sd_quota_list;
        struct gfs2_quota_data *qd;
+       LIST_HEAD(dispose);
+       int count;
 
-       spin_lock(&qd_lock);
-       while (!list_empty(head)) {
-               qd = list_last_entry(head, struct gfs2_quota_data, qd_list);
+       BUG_ON(test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
 
-               list_del(&qd->qd_list);
+       spin_lock(&qd_lock);
+       list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
+               spin_lock(&qd->qd_lockref.lock);
+               if (qd->qd_lockref.count != 0) {
+                       spin_unlock(&qd->qd_lockref.lock);
+                       continue;
+               }
+               lockref_mark_dead(&qd->qd_lockref);
+               spin_unlock(&qd->qd_lockref.lock);
 
-               /* Also remove if this qd exists in the reclaim list */
                list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
-               atomic_dec(&sdp->sd_quota_count);
-               spin_unlock(&qd_lock);
-
-               spin_lock_bucket(qd->qd_hash);
-               hlist_bl_del_rcu(&qd->qd_hlist);
-               spin_unlock_bucket(qd->qd_hash);
-
-               gfs2_assert_warn(sdp, !qd->qd_change);
-               gfs2_assert_warn(sdp, !qd->qd_slot_count);
-               gfs2_assert_warn(sdp, !qd->qd_bh_count);
-
-               gfs2_glock_put(qd->qd_gl);
-               call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
-
-               spin_lock(&qd_lock);
+               list_add(&qd->qd_lru, &dispose);
        }
        spin_unlock(&qd_lock);
 
-       gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
+       gfs2_qd_list_dispose(&dispose);
+
+       wait_event_timeout(sdp->sd_kill_wait,
+               (count = atomic_read(&sdp->sd_quota_count)) == 0,
+               HZ * 60);
+
+       if (count != 0)
+               fs_err(sdp, "%d left-over quota data objects\n", count);
 
        kvfree(sdp->sd_quota_bitmap);
        sdp->sd_quota_bitmap = NULL;
@@ -1536,12 +1558,11 @@ int gfs2_quotad(void *data)
        unsigned long statfs_timeo = 0;
        unsigned long quotad_timeo = 0;
        unsigned long t = 0;
-       DEFINE_WAIT(wait);
 
        while (!kthread_should_stop()) {
-
                if (gfs2_withdrawn(sdp))
-                       goto bypass;
+                       break;
+
                /* Update the master statfs file */
                if (sdp->sd_statfs_force_sync) {
                        int error = gfs2_statfs_sync(sdp->sd_vfs, 0);
@@ -1559,15 +1580,16 @@ int gfs2_quotad(void *data)
 
                try_to_freeze();
 
-bypass:
                t = min(quotad_timeo, statfs_timeo);
 
-               prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
-               if (!sdp->sd_statfs_force_sync)
-                       t -= schedule_timeout(t);
-               else
+               t = wait_event_interruptible_timeout(sdp->sd_quota_wait,
+                               sdp->sd_statfs_force_sync ||
+                               gfs2_withdrawn(sdp) ||
+                               kthread_should_stop(),
+                               t);
+
+               if (sdp->sd_statfs_force_sync)
                        t = 0;
-               finish_wait(&sdp->sd_quota_wait, &wait);
        }
 
        return 0;
@@ -1580,6 +1602,8 @@ static int gfs2_quota_get_state(struct super_block *sb, struct qc_state *state)
        memset(state, 0, sizeof(*state));
 
        switch (sdp->sd_args.ar_quota) {
+       case GFS2_QUOTA_QUIET:
+               fallthrough;
        case GFS2_QUOTA_ON:
                state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
                state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
@@ -1726,7 +1750,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
                goto out_release;
 
        /* Apply changes */
-       error = gfs2_adjust_quota(ip, offset, 0, qd, fdq);
+       error = gfs2_adjust_quota(sdp, offset, 0, qd, fdq);
        if (!error)
                clear_bit(QDF_QMSG_QUIET, &qd->qd_flags);
 
index 9c7a9f6..5aae026 100644 (file)
@@ -27,7 +27,7 @@
 #include "util.h"
 #include "dir.h"
 
-struct workqueue_struct *gfs_recovery_wq;
+struct workqueue_struct *gfs2_recovery_wq;
 
 int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
                           struct buffer_head **bh)
@@ -570,7 +570,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
                return -EBUSY;
 
        /* we have JDF_RECOVERY, queue should always succeed */
-       rv = queue_work(gfs_recovery_wq, &jd->jd_work);
+       rv = queue_work(gfs2_recovery_wq, &jd->jd_work);
        BUG_ON(!rv);
 
        if (wait)
index 0d30f8e..7a0c9d0 100644 (file)
@@ -9,7 +9,7 @@
 
 #include "incore.h"
 
-extern struct workqueue_struct *gfs_recovery_wq;
+extern struct workqueue_struct *gfs2_recovery_wq;
 
 static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, u32 *blk)
 {
index 2f70133..02d93da 100644 (file)
@@ -546,20 +546,10 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
        int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 
-       if (!test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+       if (!test_bit(SDF_KILL, &sdp->sd_flags))
                gfs2_flush_delete_work(sdp);
 
-       if (!log_write_allowed && current == sdp->sd_quotad_process)
-               fs_warn(sdp, "The quotad daemon is withdrawing.\n");
-       else if (sdp->sd_quotad_process)
-               kthread_stop(sdp->sd_quotad_process);
-       sdp->sd_quotad_process = NULL;
-
-       if (!log_write_allowed && current == sdp->sd_logd_process)
-               fs_warn(sdp, "The logd daemon is withdrawing.\n");
-       else if (sdp->sd_logd_process)
-               kthread_stop(sdp->sd_logd_process);
-       sdp->sd_logd_process = NULL;
+       gfs2_destroy_threads(sdp);
 
        if (log_write_allowed) {
                gfs2_quota_sync(sdp->sd_vfs, 0);
@@ -580,15 +570,8 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
                                   gfs2_log_is_empty(sdp),
                                   HZ * 5);
                gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
-       } else {
-               wait_event_timeout(sdp->sd_log_waitq,
-                                  gfs2_log_is_empty(sdp),
-                                  HZ * 5);
        }
        gfs2_quota_cleanup(sdp);
-
-       if (!log_write_allowed)
-               sdp->sd_vfs->s_flags |= SB_RDONLY;
 }
 
 /**
@@ -622,6 +605,10 @@ restart:
        if (!sb_rdonly(sb)) {
                gfs2_make_fs_ro(sdp);
        }
+       if (gfs2_withdrawn(sdp)) {
+               gfs2_destroy_threads(sdp);
+               gfs2_quota_cleanup(sdp);
+       }
        WARN_ON(gfs2_withdrawing(sdp));
 
        /*  At this point, we're through modifying the disk  */
@@ -1134,6 +1121,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
                case GFS2_QUOTA_ON:
                        state = "on";
                        break;
+               case GFS2_QUOTA_QUIET:
+                       state = "quiet";
+                       break;
                default:
                        state = "unknown";
                        break;
index bba5862..ab9c831 100644 (file)
@@ -36,6 +36,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
 extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
 extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp);
 extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
+extern void gfs2_destroy_threads(struct gfs2_sbd *sdp);
 extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
 extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
                               s64 dinodes);
index c60bc7f..60a0206 100644 (file)
@@ -98,7 +98,10 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
                     "sd_log_flush_head:        %d\n"
                     "sd_log_flush_tail:        %d\n"
                     "sd_log_blks_reserved:     %d\n"
-                    "sd_log_revokes_available: %d\n",
+                    "sd_log_revokes_available: %d\n"
+                    "sd_log_pinned:            %d\n"
+                    "sd_log_thresh1:           %d\n"
+                    "sd_log_thresh2:           %d\n",
                     test_bit(SDF_JOURNAL_CHECKED, &f),
                     test_bit(SDF_JOURNAL_LIVE, &f),
                     (sdp->sd_jdesc ? sdp->sd_jdesc->jd_jid : 0),
@@ -118,7 +121,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
                     test_bit(SDF_WITHDRAW_IN_PROG, &f),
                     test_bit(SDF_REMOTE_WITHDRAW, &f),
                     test_bit(SDF_WITHDRAW_RECOVERY, &f),
-                    test_bit(SDF_DEACTIVATING, &f),
+                    test_bit(SDF_KILL, &f),
                     sdp->sd_log_error,
                     rwsem_is_locked(&sdp->sd_log_flush_lock),
                     sdp->sd_log_num_revoke,
@@ -128,7 +131,10 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf)
                     sdp->sd_log_flush_head,
                     sdp->sd_log_flush_tail,
                     sdp->sd_log_blks_reserved,
-                    atomic_read(&sdp->sd_log_revokes_available));
+                    atomic_read(&sdp->sd_log_revokes_available),
+                    atomic_read(&sdp->sd_log_pinned),
+                    atomic_read(&sdp->sd_log_thresh1),
+                    atomic_read(&sdp->sd_log_thresh2));
        return s;
 }
 
index dac22b1..da29faf 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
+#include <linux/kthread.h>
 #include <linux/crc32.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/delay.h>
@@ -150,7 +151,14 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
        if (!sb_rdonly(sdp->sd_vfs)) {
                bool locked = mutex_trylock(&sdp->sd_freeze_mutex);
 
-               gfs2_make_fs_ro(sdp);
+               wake_up(&sdp->sd_logd_waitq);
+               wake_up(&sdp->sd_quota_wait);
+
+               wait_event_timeout(sdp->sd_log_waitq,
+                                  gfs2_log_is_empty(sdp),
+                                  HZ * 5);
+
+               sdp->sd_vfs->s_flags |= SB_RDONLY;
 
                if (locked)
                        mutex_unlock(&sdp->sd_freeze_mutex);
@@ -315,19 +323,19 @@ int gfs2_withdraw(struct gfs2_sbd *sdp)
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
        const struct lm_lockops *lm = ls->ls_ops;
 
-       if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
-           test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
-               if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
-                       return -1;
-
-               wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
-                           TASK_UNINTERRUPTIBLE);
-               return -1;
-       }
-
-       set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
-
        if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
+               unsigned long old = READ_ONCE(sdp->sd_flags), new;
+
+               do {
+                       if (old & BIT(SDF_WITHDRAWN)) {
+                               wait_on_bit(&sdp->sd_flags,
+                                           SDF_WITHDRAW_IN_PROG,
+                                           TASK_UNINTERRUPTIBLE);
+                               return -1;
+                       }
+                       new = old | BIT(SDF_WITHDRAWN) | BIT(SDF_WITHDRAW_IN_PROG);
+               } while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new)));
+
                fs_err(sdp, "about to withdraw this file system\n");
                BUG_ON(sdp->sd_args.ar_debug);
 
index a0d0e2f..2a601af 100644 (file)
@@ -618,11 +618,6 @@ config NLS_UTF8
          the Unicode/ISO9646 universal character set.
 
 config NLS_UCS2_UTILS
-       tristate "NLS UCS-2 UTILS"
-       help
-         Set of older UCS-2 conversion utilities and tables used by some
-         filesystems including SMB/CIFS.  This includes upper case conversion
-         tables. This will automatically be selected when the filesystem
-         that uses it is selected.
+       tristate
 
 endif # NLS
index 5fffdde..cfec5e0 100644 (file)
@@ -571,12 +571,8 @@ static void init_once(void *foo)
 /*
  * Noinline to reduce binary size.
  */
-static noinline void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
+static noinline void ntfs3_put_sbi(struct ntfs_sb_info *sbi)
 {
-       kfree(sbi->new_rec);
-       kvfree(ntfs_put_shared(sbi->upcase));
-       kfree(sbi->def_table);
-
        wnd_close(&sbi->mft.bitmap);
        wnd_close(&sbi->used.bitmap);
 
@@ -601,6 +597,13 @@ static noinline void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
        indx_clear(&sbi->security.index_sdh);
        indx_clear(&sbi->reparse.index_r);
        indx_clear(&sbi->objid.index_o);
+}
+
+static void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
+{
+       kfree(sbi->new_rec);
+       kvfree(ntfs_put_shared(sbi->upcase));
+       kfree(sbi->def_table);
        kfree(sbi->compress.lznt);
 #ifdef CONFIG_NTFS3_LZX_XPRESS
        xpress_free_decompressor(sbi->compress.xpress);
@@ -625,6 +628,7 @@ static void ntfs_put_super(struct super_block *sb)
 
        /* Mark rw ntfs as clear, if possible. */
        ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
+       ntfs3_put_sbi(sbi);
 }
 
 static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1644,8 +1648,10 @@ static void ntfs_fs_free(struct fs_context *fc)
        struct ntfs_mount_options *opts = fc->fs_private;
        struct ntfs_sb_info *sbi = fc->s_fs_info;
 
-       if (sbi)
+       if (sbi) {
+               ntfs3_put_sbi(sbi);
                ntfs3_free_sbi(sbi);
+       }
 
        if (opts)
                put_mount_options(opts);
index 0f2aa36..3dd5be9 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/hugetlb.h>
 #include <linux/huge_mm.h>
 #include <linux/mount.h>
+#include <linux/ksm.h>
 #include <linux/seq_file.h>
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
@@ -396,6 +397,7 @@ struct mem_size_stats {
        unsigned long swap;
        unsigned long shared_hugetlb;
        unsigned long private_hugetlb;
+       unsigned long ksm;
        u64 pss;
        u64 pss_anon;
        u64 pss_file;
@@ -452,6 +454,9 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
                        mss->lazyfree += size;
        }
 
+       if (PageKsm(page))
+               mss->ksm += size;
+
        mss->resident += size;
        /* Accumulate the size in pages that have been accessed. */
        if (young || page_is_young(page) || PageReferenced(page))
@@ -825,6 +830,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
        SEQ_PUT_DEC(" kB\nPrivate_Dirty:  ", mss->private_dirty);
        SEQ_PUT_DEC(" kB\nReferenced:     ", mss->referenced);
        SEQ_PUT_DEC(" kB\nAnonymous:      ", mss->anonymous);
+       SEQ_PUT_DEC(" kB\nKSM:            ", mss->ksm);
        SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
        SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
        SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
index 2d5e9a9..b17f067 100644 (file)
@@ -18,7 +18,8 @@ static void smb2_close_cached_fid(struct kref *ref);
 
 static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
                                                    const char *path,
-                                                   bool lookup_only)
+                                                   bool lookup_only,
+                                                   __u32 max_cached_dirs)
 {
        struct cached_fid *cfid;
 
@@ -43,7 +44,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
                spin_unlock(&cfids->cfid_list_lock);
                return NULL;
        }
-       if (cfids->num_entries >= MAX_CACHED_FIDS) {
+       if (cfids->num_entries >= max_cached_dirs) {
                spin_unlock(&cfids->cfid_list_lock);
                return NULL;
        }
@@ -145,7 +146,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
        const char *npath;
 
        if (tcon == NULL || tcon->cfids == NULL || tcon->nohandlecache ||
-           is_smb1_server(tcon->ses->server))
+           is_smb1_server(tcon->ses->server) || (dir_cache_timeout == 0))
                return -EOPNOTSUPP;
 
        ses = tcon->ses;
@@ -162,7 +163,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
        if (!utf16_path)
                return -ENOMEM;
 
-       cfid = find_or_create_cached_dir(cfids, path, lookup_only);
+       cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs);
        if (cfid == NULL) {
                kfree(utf16_path);
                return -ENOENT;
@@ -582,7 +583,7 @@ cifs_cfids_laundromat_thread(void *p)
                        return 0;
                spin_lock(&cfids->cfid_list_lock);
                list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
-                       if (time_after(jiffies, cfid->time + HZ * 30)) {
+                       if (time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
                                list_del(&cfid->entry);
                                list_add(&cfid->entry, &entry);
                                cfids->num_entries--;
index facc9b1..a82ff2c 100644 (file)
@@ -49,7 +49,7 @@ struct cached_fid {
        struct cached_dirents dirents;
 };
 
-#define MAX_CACHED_FIDS 16
+/* default MAX_CACHED_FIDS is 16 */
 struct cached_fids {
        /* Must be held when:
         * - accessing the cfids->entries list
index 73c44e0..22869cd 100644 (file)
@@ -117,6 +117,10 @@ module_param(cifs_max_pending, uint, 0444);
 MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
                                   "CIFS/SMB1 dialect (N/A for SMB3) "
                                   "Default: 32767 Range: 2 to 32767.");
+unsigned int dir_cache_timeout = 30;
+module_param(dir_cache_timeout, uint, 0644);
+MODULE_PARM_DESC(dir_cache_timeout, "Number of seconds to cache directory contents for which we have a lease. Default: 30 "
+                                "Range: 1 to 65000 seconds, 0 to disable caching dir contents");
 #ifdef CONFIG_CIFS_STATS2
 unsigned int slow_rsp_threshold = 1;
 module_param(slow_rsp_threshold, uint, 0644);
@@ -695,6 +699,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
        if (tcon->handle_timeout)
                seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
+       if (tcon->max_cached_dirs != MAX_CACHED_FIDS)
+               seq_printf(s, ",max_cached_dirs=%u", tcon->max_cached_dirs);
 
        /*
         * Display file and directory attribute timeout in seconds.
@@ -1679,6 +1685,12 @@ init_cifs(void)
                         CIFS_MAX_REQ);
        }
 
+       /* Limit max to about 18 hours, and setting to zero disables directory entry caching */
+       if (dir_cache_timeout > 65000) {
+               dir_cache_timeout = 65000;
+               cifs_dbg(VFS, "dir_cache_timeout set to max of 65000 seconds\n");
+       }
+
        cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
        if (!cifsiod_wq) {
                rc = -ENOMEM;
index 532c38f..41daebd 100644 (file)
@@ -152,6 +152,6 @@ extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
 /* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 44
-#define CIFS_VERSION   "2.44"
+#define SMB3_PRODUCT_BUILD 45
+#define CIFS_VERSION   "2.45"
 #endif                         /* _CIFSFS_H */
index 259e231..032d871 100644 (file)
@@ -1210,6 +1210,7 @@ struct cifs_tcon {
        __u32 max_chunks;
        __u32 max_bytes_chunk;
        __u32 max_bytes_copy;
+       __u32 max_cached_dirs;
 #ifdef CONFIG_CIFS_FSCACHE
        u64 resource_id;                /* server resource id */
        struct fscache_volume *fscache; /* cookie for share */
@@ -2016,6 +2017,7 @@ extern unsigned int CIFSMaxBufSize;  /* max size not including hdr */
 extern unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
 extern unsigned int cifs_min_small;  /* min size of small buf pool */
 extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
+extern unsigned int dir_cache_timeout; /* max time for directory lease caching of dir */
 extern bool disable_legacy_dialects;  /* forbid vers=1.0 and vers=2.0 mounts */
 extern atomic_t mid_count;
 
index 3bd71f9..6877547 100644 (file)
@@ -2657,6 +2657,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
        tcon->retry = ctx->retry;
        tcon->nocase = ctx->nocase;
        tcon->broken_sparse_sup = ctx->no_sparse;
+       tcon->max_cached_dirs = ctx->max_cached_dirs;
        if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING)
                tcon->nohandlecache = ctx->nohandlecache;
        else
index 67e16c2..e45ce31 100644 (file)
@@ -150,6 +150,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
        fsparam_u32("closetimeo", Opt_closetimeo),
        fsparam_u32("echo_interval", Opt_echo_interval),
        fsparam_u32("max_credits", Opt_max_credits),
+       fsparam_u32("max_cached_dirs", Opt_max_cached_dirs),
        fsparam_u32("handletimeout", Opt_handletimeout),
        fsparam_u64("snapshot", Opt_snapshot),
        fsparam_u32("max_channels", Opt_max_channels),
@@ -1165,6 +1166,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
                if (result.uint_32 > 1)
                        ctx->multichannel = true;
                break;
+       case Opt_max_cached_dirs:
+               if (result.uint_32 < 1) {
+                       cifs_errorf(fc, "%s: Invalid max_cached_dirs, needs to be 1 or more\n",
+                                   __func__);
+                       goto cifs_parse_mount_err;
+               }
+               ctx->max_cached_dirs = result.uint_32;
+               break;
        case Opt_handletimeout:
                ctx->handle_timeout = result.uint_32;
                if (ctx->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) {
@@ -1592,7 +1601,7 @@ int smb3_init_fs_context(struct fs_context *fc)
        ctx->acregmax = CIFS_DEF_ACTIMEO;
        ctx->acdirmax = CIFS_DEF_ACTIMEO;
        ctx->closetimeo = SMB3_DEF_DCLOSETIMEO;
-
+       ctx->max_cached_dirs = MAX_CACHED_FIDS;
        /* Most clients set timeout to 0, allows server to use its default */
        ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
 
index f4eaf85..9d8d34a 100644 (file)
@@ -128,6 +128,7 @@ enum cifs_param {
        Opt_closetimeo,
        Opt_echo_interval,
        Opt_max_credits,
+       Opt_max_cached_dirs,
        Opt_snapshot,
        Opt_max_channels,
        Opt_handletimeout,
@@ -261,6 +262,7 @@ struct smb3_fs_context {
        __u32 handle_timeout; /* persistent and durable handle timeout in ms */
        unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
        unsigned int max_channels;
+       unsigned int max_cached_dirs;
        __u16 compression; /* compression algorithm 0xFFFF default 0=disabled */
        bool rootfs:1; /* if it's a SMB root file system */
        bool witness:1; /* use witness protocol */
@@ -287,7 +289,7 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
  */
 #define SMB3_MAX_DCLOSETIMEO (1 << 30)
 #define SMB3_DEF_DCLOSETIMEO (1 * HZ) /* even 1 sec enough to help eg open/write/close/open/read */
-
+#define MAX_CACHED_FIDS 16
 extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp);
 
 #endif
index 3677525..e5cad14 100644 (file)
@@ -48,7 +48,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
        sharename = extract_sharename(tcon->tree_name);
        if (IS_ERR(sharename)) {
                cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
-               return -EINVAL;
+               return PTR_ERR(sharename);
        }
 
        slen = strlen(sharename);
index e3dd698..d9eda2e 100644 (file)
@@ -2683,6 +2683,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
                smb2_copy_fs_info_to_kstatfs(info, buf);
 
 qfs_exit:
+       trace_smb3_qfs_done(xid, tcon->tid, tcon->ses->Suid, tcon->tree_name, rc);
        free_rsp_buf(buftype, rsp_iov.iov_base);
        return rc;
 }
index e671bd1..a7e4755 100644 (file)
@@ -691,7 +691,7 @@ DEFINE_EVENT(smb3_tcon_class, smb3_##name,    \
        TP_ARGS(xid, tid, sesid, unc_name, rc))
 
 DEFINE_SMB3_TCON_EVENT(tcon);
-
+DEFINE_SMB3_TCON_EVENT(qfs_done);
 
 /*
  * For smb2/smb3 open (including create and mkdir) calls
index 2680251..319fb9f 100644 (file)
@@ -406,7 +406,7 @@ struct smb2_tree_disconnect_rsp {
 /* Capabilities flags */
 #define SMB2_GLOBAL_CAP_DFS            0x00000001
 #define SMB2_GLOBAL_CAP_LEASING                0x00000002 /* Resp only New to SMB2.1 */
-#define SMB2_GLOBAL_CAP_LARGE_MTU      0X00000004 /* Resp only New to SMB2.1 */
+#define SMB2_GLOBAL_CAP_LARGE_MTU      0x00000004 /* Resp only New to SMB2.1 */
 #define SMB2_GLOBAL_CAP_MULTI_CHANNEL  0x00000008 /* New to SMB3 */
 #define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */
 #define SMB2_GLOBAL_CAP_DIRECTORY_LEASING  0x00000020 /* New to SMB3 */
index 793151d..cabe6a8 100644 (file)
@@ -1,5 +1,5 @@
 config SMB_SERVER
-       tristate "SMB3 server support (EXPERIMENTAL)"
+       tristate "SMB3 server support"
        depends on INET
        depends on MULTIUSER
        depends on FILE_LOCKING
index 801cd09..5ab2f52 100644 (file)
@@ -590,8 +590,6 @@ static int __init ksmbd_server_init(void)
        if (ret)
                goto err_crypto_destroy;
 
-       pr_warn_once("The ksmbd server is experimental\n");
-
        return 0;
 
 err_crypto_destroy:
index 136711a..6822ac7 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -311,6 +311,23 @@ int vfs_fstatat(int dfd, const char __user *filename,
        int statx_flags = flags | AT_NO_AUTOMOUNT;
        struct filename *name;
 
+       /*
+        * Work around glibc turning fstat() into fstatat(AT_EMPTY_PATH)
+        *
+        * If AT_EMPTY_PATH is set, we expect the common case to be that
+        * empty path, and avoid doing all the extra pathname work.
+        */
+       if (dfd >= 0 && flags == AT_EMPTY_PATH) {
+               char c;
+
+               ret = get_user(c, filename);
+               if (unlikely(ret))
+                       return ret;
+
+               if (likely(!c))
+                       return vfs_fstat(dfd, stat);
+       }
+
        name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL);
        ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS);
        putname(name);
diff --git a/include/asm-generic/ide_iops.h b/include/asm-generic/ide_iops.h
deleted file mode 100644 (file)
index 81dfa3e..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Generic I/O and MEMIO string operations.  */
-
-#define __ide_insw     insw
-#define __ide_insl     insl
-#define __ide_outsw    outsw
-#define __ide_outsl    outsl
-
-static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               *(u16 *)addr = readw(port);
-               addr += 2;
-       }
-}
-
-static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               *(u32 *)addr = readl(port);
-               addr += 4;
-       }
-}
-
-static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count)
-{
-       while (count--) {
-               writew(*(u16 *)addr, port);
-               addr += 2;
-       }
-}
-
-static __inline__ void __ide_mm_outsl(void __iomem * port, void *addr, u32 count)
-{
-       while (count--) {
-               writel(*(u32 *)addr, port);
-               addr += 4;
-       }
-}
index 847da6f..31029f4 100644 (file)
@@ -12,7 +12,7 @@
 
 #define ARMV8_PMU_CYCLE_IDX            (ARMV8_PMU_MAX_COUNTERS - 1)
 
-#ifdef CONFIG_HW_PERF_EVENTS
+#if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
 
 struct kvm_pmc {
        u8 idx; /* index into the pmu->pmc array */
@@ -74,6 +74,7 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
 struct kvm_pmu_events *kvm_get_pmu_events(void);
 void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
 void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
+void kvm_vcpu_pmu_resync_el0(void);
 
 #define kvm_vcpu_has_pmu(vcpu)                                 \
        (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
@@ -171,6 +172,7 @@ static inline u8 kvm_arm_pmu_get_pmuver_limit(void)
 {
        return 0;
 }
+static inline void kvm_vcpu_pmu_resync_el0(void) {}
 
 #endif
 
index 6a3a9e1..51b1b70 100644 (file)
@@ -117,6 +117,8 @@ enum audit_nfcfgop {
        AUDIT_NFT_OP_OBJ_RESET,
        AUDIT_NFT_OP_FLOWTABLE_REGISTER,
        AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+       AUDIT_NFT_OP_SETELEM_RESET,
+       AUDIT_NFT_OP_RULE_RESET,
        AUDIT_NFT_OP_INVALID,
 };
 
index 12596af..024e8b2 100644 (file)
@@ -438,7 +438,7 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
 
        size /= sizeof(long);
        while (size--)
-               *ldst++ = *lsrc++;
+               data_race(*ldst++ = *lsrc++);
 }
 
 /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
index 49586ff..5f2301e 100644 (file)
@@ -359,14 +359,19 @@ enum {
 
 extern const char *ceph_mds_op_name(int op);
 
-
-#define CEPH_SETATTR_MODE   1
-#define CEPH_SETATTR_UID    2
-#define CEPH_SETATTR_GID    4
-#define CEPH_SETATTR_MTIME  8
-#define CEPH_SETATTR_ATIME 16
-#define CEPH_SETATTR_SIZE  32
-#define CEPH_SETATTR_CTIME 64
+#define CEPH_SETATTR_MODE              (1 << 0)
+#define CEPH_SETATTR_UID               (1 << 1)
+#define CEPH_SETATTR_GID               (1 << 2)
+#define CEPH_SETATTR_MTIME             (1 << 3)
+#define CEPH_SETATTR_ATIME             (1 << 4)
+#define CEPH_SETATTR_SIZE              (1 << 5)
+#define CEPH_SETATTR_CTIME             (1 << 6)
+#define CEPH_SETATTR_MTIME_NOW         (1 << 7)
+#define CEPH_SETATTR_ATIME_NOW         (1 << 8)
+#define CEPH_SETATTR_BTIME             (1 << 9)
+#define CEPH_SETATTR_KILL_SGUID        (1 << 10)
+#define CEPH_SETATTR_FSCRYPT_AUTH      (1 << 11)
+#define CEPH_SETATTR_FSCRYPT_FILE      (1 << 12)
 
 /*
  * Ceph setxattr request flags.
@@ -462,24 +467,26 @@ union ceph_mds_request_args {
 } __attribute__ ((packed));
 
 union ceph_mds_request_args_ext {
-       union ceph_mds_request_args old;
-       struct {
-               __le32 mode;
-               __le32 uid;
-               __le32 gid;
-               struct ceph_timespec mtime;
-               struct ceph_timespec atime;
-               __le64 size, old_size;       /* old_size needed by truncate */
-               __le32 mask;                 /* CEPH_SETATTR_* */
-               struct ceph_timespec btime;
-       } __attribute__ ((packed)) setattr_ext;
+       union {
+               union ceph_mds_request_args old;
+               struct {
+                       __le32 mode;
+                       __le32 uid;
+                       __le32 gid;
+                       struct ceph_timespec mtime;
+                       struct ceph_timespec atime;
+                       __le64 size, old_size;       /* old_size needed by truncate */
+                       __le32 mask;                 /* CEPH_SETATTR_* */
+                       struct ceph_timespec btime;
+               } __attribute__ ((packed)) setattr_ext;
+       };
 };
 
 #define CEPH_MDS_FLAG_REPLAY           1 /* this is a replayed op */
 #define CEPH_MDS_FLAG_WANT_DENTRY      2 /* want dentry in reply */
 #define CEPH_MDS_FLAG_ASYNC            4 /* request is asynchronous */
 
-struct ceph_mds_request_head_old {
+struct ceph_mds_request_head_legacy {
        __le64 oldest_client_tid;
        __le32 mdsmap_epoch;           /* on client */
        __le32 flags;                  /* CEPH_MDS_FLAG_* */
@@ -492,9 +499,9 @@ struct ceph_mds_request_head_old {
        union ceph_mds_request_args args;
 } __attribute__ ((packed));
 
-#define CEPH_MDS_REQUEST_HEAD_VERSION  1
+#define CEPH_MDS_REQUEST_HEAD_VERSION  2
 
-struct ceph_mds_request_head {
+struct ceph_mds_request_head_old {
        __le16 version;                /* struct version */
        __le64 oldest_client_tid;
        __le32 mdsmap_epoch;           /* on client */
@@ -508,6 +515,23 @@ struct ceph_mds_request_head {
        union ceph_mds_request_args_ext args;
 } __attribute__ ((packed));
 
+struct ceph_mds_request_head {
+       __le16 version;                /* struct version */
+       __le64 oldest_client_tid;
+       __le32 mdsmap_epoch;           /* on client */
+       __le32 flags;                  /* CEPH_MDS_FLAG_* */
+       __u8 num_retry, num_fwd;       /* legacy count retry and fwd attempts */
+       __le16 num_releases;           /* # include cap/lease release records */
+       __le32 op;                     /* mds op code */
+       __le32 caller_uid, caller_gid;
+       __le64 ino;                    /* use this ino for openc, mkdir, mknod,
+                                         etc. (if replaying) */
+       union ceph_mds_request_args_ext args;
+
+       __le32 ext_num_retry;          /* new count retry attempts */
+       __le32 ext_num_fwd;            /* new count fwd attempts */
+} __attribute__ ((packed));
+
 /* cap/lease release record */
 struct ceph_mds_request_release {
        __le64 ino, cap_id;            /* ino and unique cap id */
index 99c1726..2eaaabb 100644 (file)
@@ -17,6 +17,7 @@
 
 struct ceph_msg;
 struct ceph_connection;
+struct ceph_msg_data_cursor;
 
 /*
  * Ceph defines these callbacks for handling connection events.
@@ -70,6 +71,30 @@ struct ceph_connection_operations {
                                      int used_proto, int result,
                                      const int *allowed_protos, int proto_cnt,
                                      const int *allowed_modes, int mode_cnt);
+
+       /**
+        * sparse_read: read sparse data
+        * @con: connection we're reading from
+        * @cursor: data cursor for reading extents
+        * @buf: optional buffer to read into
+        *
+        * This should be called more than once, each time setting up to
+        * receive an extent into the current cursor position, and zeroing
+        * the holes between them.
+        *
+        * Returns amount of data to be read (in bytes), 0 if reading is
+        * complete, or -errno if there was an error.
+        *
+        * If @buf is set on a >0 return, then the data should be read into
+        * the provided buffer. Otherwise, it should be read into the cursor.
+        *
+        * The sparse read operation is expected to initialize the cursor
+        * with a length covering up to the end of the last extent.
+        */
+       int (*sparse_read)(struct ceph_connection *con,
+                          struct ceph_msg_data_cursor *cursor,
+                          char **buf);
+
 };
 
 /* use format string %s%lld */
@@ -98,6 +123,7 @@ enum ceph_msg_data_type {
        CEPH_MSG_DATA_BIO,      /* data source/destination is a bio list */
 #endif /* CONFIG_BLOCK */
        CEPH_MSG_DATA_BVECS,    /* data source/destination is a bio_vec array */
+       CEPH_MSG_DATA_ITER,     /* data source/destination is an iov_iter */
 };
 
 #ifdef CONFIG_BLOCK
@@ -199,6 +225,7 @@ struct ceph_msg_data {
                        bool            own_pages;
                };
                struct ceph_pagelist    *pagelist;
+               struct iov_iter         iter;
        };
 };
 
@@ -207,6 +234,7 @@ struct ceph_msg_data_cursor {
 
        struct ceph_msg_data    *data;          /* current data item */
        size_t                  resid;          /* bytes not yet consumed */
+       int                     sr_resid;       /* residual sparse_read len */
        bool                    need_crc;       /* crc update needed */
        union {
 #ifdef CONFIG_BLOCK
@@ -222,6 +250,10 @@ struct ceph_msg_data_cursor {
                        struct page     *page;          /* page from list */
                        size_t          offset;         /* bytes from list */
                };
+               struct {
+                       struct iov_iter         iov_iter;
+                       unsigned int            lastlen;
+               };
        };
 };
 
@@ -251,6 +283,7 @@ struct ceph_msg {
        struct kref kref;
        bool more_to_follow;
        bool needs_out_seq;
+       bool sparse_read;
        int front_alloc_len;
 
        struct ceph_msgpool *pool;
@@ -309,6 +342,10 @@ struct ceph_connection_v1_info {
 
        int in_base_pos;     /* bytes read */
 
+       /* sparse reads */
+       struct kvec in_sr_kvec; /* current location to receive into */
+       u64 in_sr_len;          /* amount of data in this extent */
+
        /* message in temps */
        u8 in_tag;           /* protocol control byte */
        struct ceph_msg_header in_hdr;
@@ -395,6 +432,7 @@ struct ceph_connection_v2_info {
 
        void *conn_bufs[16];
        int conn_buf_cnt;
+       int data_len_remain;
 
        struct kvec in_sign_kvecs[8];
        struct kvec out_sign_kvecs[8];
@@ -573,6 +611,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 #endif /* CONFIG_BLOCK */
 void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
                             struct ceph_bvec_iter *bvec_pos);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+                           struct iov_iter *iter);
 
 struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
                               gfp_t flags, bool can_fail);
index fb6be72..bf98239 100644 (file)
@@ -29,14 +29,62 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
 
 #define CEPH_HOMELESS_OSD      -1
 
-/* a given osd we're communicating with */
+/*
+ * A single extent in a SPARSE_READ reply.
+ *
+ * Note that these come from the OSD as little-endian values. On BE arches,
+ * we convert them in-place after receipt.
+ */
+struct ceph_sparse_extent {
+       u64     off;
+       u64     len;
+} __packed;
+
+/* Sparse read state machine state values */
+enum ceph_sparse_read_state {
+       CEPH_SPARSE_READ_HDR    = 0,
+       CEPH_SPARSE_READ_EXTENTS,
+       CEPH_SPARSE_READ_DATA_LEN,
+       CEPH_SPARSE_READ_DATA,
+};
+
+/*
+ * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of
+ * 64-bit offset/length pairs, and then all of the actual file data
+ * concatenated after it (sans holes).
+ *
+ * Unfortunately, we don't know how long the extent array is until we've
+ * started reading the data section of the reply. The caller should send down
+ * a destination buffer for the array, but we'll alloc one if it's too small
+ * or if the caller doesn't.
+ */
+struct ceph_sparse_read {
+       enum ceph_sparse_read_state     sr_state;    /* state machine state */
+       u64                             sr_req_off;  /* orig request offset */
+       u64                             sr_req_len;  /* orig request length */
+       u64                             sr_pos;      /* current pos in buffer */
+       int                             sr_index;    /* current extent index */
+       __le32                          sr_datalen;  /* length of actual data */
+       u32                             sr_count;    /* extent count in reply */
+       int                             sr_ext_len;  /* length of extent array */
+       struct ceph_sparse_extent       *sr_extent;  /* extent array */
+};
+
+/*
+ * A given osd we're communicating with.
+ *
+ * Note that the o_requests tree can be searched while holding the "lock" mutex
+ * or the "o_requests_lock" spinlock. Insertion or removal requires both!
+ */
 struct ceph_osd {
        refcount_t o_ref;
+       int o_sparse_op_idx;
        struct ceph_osd_client *o_osdc;
        int o_osd;
        int o_incarnation;
        struct rb_node o_node;
        struct ceph_connection o_con;
+       spinlock_t o_requests_lock;
        struct rb_root o_requests;
        struct rb_root o_linger_requests;
        struct rb_root o_backoff_mappings;
@@ -46,6 +94,7 @@ struct ceph_osd {
        unsigned long lru_ttl;
        struct list_head o_keepalive_item;
        struct mutex lock;
+       struct ceph_sparse_read o_sparse_read;
 };
 
 #define CEPH_OSD_SLAB_OPS      2
@@ -59,6 +108,7 @@ enum ceph_osd_data_type {
        CEPH_OSD_DATA_TYPE_BIO,
 #endif /* CONFIG_BLOCK */
        CEPH_OSD_DATA_TYPE_BVECS,
+       CEPH_OSD_DATA_TYPE_ITER,
 };
 
 struct ceph_osd_data {
@@ -82,6 +132,7 @@ struct ceph_osd_data {
                        struct ceph_bvec_iter   bvec_pos;
                        u32                     num_bvecs;
                };
+               struct iov_iter         iter;
        };
 };
 
@@ -98,6 +149,8 @@ struct ceph_osd_req_op {
                        u64 offset, length;
                        u64 truncate_size;
                        u32 truncate_seq;
+                       int sparse_ext_cnt;
+                       struct ceph_sparse_extent *sparse_ext;
                        struct ceph_osd_data osd_data;
                } extent;
                struct {
@@ -145,6 +198,9 @@ struct ceph_osd_req_op {
                        u32 src_fadvise_flags;
                        struct ceph_osd_data osd_data;
                } copy_from;
+               struct {
+                       u64 ver;
+               } assert_ver;
        };
 };
 
@@ -199,6 +255,7 @@ struct ceph_osd_request {
        struct ceph_osd_client *r_osdc;
        struct kref       r_kref;
        bool              r_mempool;
+       bool              r_linger;           /* don't resend on failure */
        struct completion r_completion;       /* private to osd_client.c */
        ceph_osdc_callback_t r_callback;
 
@@ -211,9 +268,9 @@ struct ceph_osd_request {
        struct ceph_snap_context *r_snapc;    /* for writes */
        struct timespec64 r_mtime;            /* ditto */
        u64 r_data_offset;                    /* ditto */
-       bool r_linger;                        /* don't resend on failure */
 
        /* internal */
+       u64 r_version;                        /* data version sent in reply */
        unsigned long r_stamp;                /* jiffies, send or check time */
        unsigned long r_start_stamp;          /* jiffies */
        ktime_t r_start_latency;              /* ktime_t */
@@ -450,6 +507,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
 void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
                                         unsigned int which,
                                         struct ceph_bvec_iter *bvec_pos);
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+                               unsigned int which, struct iov_iter *iter);
 
 extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
                                        unsigned int which,
@@ -504,6 +563,20 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
                                      u32 truncate_seq, u64 truncate_size,
                                      bool use_mempool);
 
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
+
+/*
+ * How big an extent array should we preallocate for a sparse read? This is
+ * just a starting value.  If we get more than this back from the OSD, the
+ * receiver will reallocate.
+ */
+#define CEPH_SPARSE_EXT_ARRAY_INITIAL  16
+
+static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
+{
+       return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
+}
+
 extern void ceph_osdc_get_request(struct ceph_osd_request *req);
 extern void ceph_osdc_put_request(struct ceph_osd_request *req);
 
@@ -558,5 +631,19 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
                            struct ceph_object_locator *oloc,
                            struct ceph_watch_item **watchers,
                            u32 *num_watchers);
-#endif
 
+/* Find offset into the buffer of the end of the extent map */
+static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
+{
+       struct ceph_sparse_extent *ext;
+
+       /* No extents? No data */
+       if (op->extent.sparse_ext_cnt == 0)
+               return 0;
+
+       ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
+
+       return ext->off + ext->len - op->extent.offset;
+}
+
+#endif
index 43a7a15..73c3efb 100644 (file)
@@ -524,6 +524,10 @@ struct ceph_osd_op {
                        __le64 cookie;
                } __attribute__ ((packed)) notify;
                struct {
+                       __le64 unused;
+                       __le64 ver;
+               } __attribute__ ((packed)) assert_ver;
+               struct {
                        __le64 offset, length;
                        __le64 src_offset;
                } __attribute__ ((packed)) clonerange;
index d319566..7de11c7 100644 (file)
@@ -154,6 +154,8 @@ static inline int con_debug_leave(void)
  *                     receiving the printk spam for obvious reasons.
  * @CON_EXTENDED:      The console supports the extended output format of
  *                     /dev/kmesg which requires a larger output buffer.
+ * @CON_SUSPENDED:     Indicates if a console is suspended. If true, the
+ *                     printing callbacks must not be called.
  */
 enum cons_flags {
        CON_PRINTBUFFER         = BIT(0),
@@ -163,6 +165,7 @@ enum cons_flags {
        CON_ANYTIME             = BIT(4),
        CON_BRL                 = BIT(5),
        CON_EXTENDED            = BIT(6),
+       CON_SUSPENDED           = BIT(7),
 };
 
 /**
index 43b363a..71d186d 100644 (file)
@@ -141,6 +141,9 @@ struct cpufreq_policy {
         */
        bool                    dvfs_possible_from_any_cpu;
 
+       /* Per policy boost enabled flag. */
+       bool                    boost_enabled;
+
         /* Cached frequency lookup from cpufreq_driver_resolve_freq. */
        unsigned int cached_target_freq;
        unsigned int cached_resolved_idx;
index beed838..9911508 100644 (file)
@@ -50,7 +50,7 @@ extern struct module __this_module;
                __EXPORT_SYMBOL_REF(sym)        ASM_NL  \
        .previous
 
-#if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS)
+#if defined(__DISABLE_EXPORTS)
 
 /*
  * Allow symbol exports to be disabled completely so that C code may
@@ -75,7 +75,7 @@ extern struct module __this_module;
        __ADDRESSABLE(sym)                                      \
        asm(__stringify(___EXPORT_SYMBOL(sym, license, ns)))
 
-#endif /* CONFIG_MODULES */
+#endif
 
 #ifdef DEFAULT_SYMBOL_NAMESPACE
 #define _EXPORT_SYMBOL(sym, license)   __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE))
index 0a221e7..07e3701 100644 (file)
@@ -63,7 +63,7 @@ struct gameport_driver {
 int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mode);
 void gameport_close(struct gameport *gameport);
 
-#if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE))
+#if IS_REACHABLE(CONFIG_GAMEPORT)
 
 void __gameport_register_port(struct gameport *gameport, struct module *owner);
 /* use a define to avoid include chaining to get THIS_MODULE */
index 5883551..af8a771 100644 (file)
@@ -147,6 +147,7 @@ struct inet6_skb_parm {
 #define IP6SKB_JUMBOGRAM      128
 #define IP6SKB_SEG6          256
 #define IP6SKB_FAKEJUMBO      512
+#define IP6SKB_MULTIPATH      1024
 };
 
 #if defined(CONFIG_NET_L3_MASTER_DEV)
index 819b6bc..3df5499 100644 (file)
@@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
 int kasan_populate_early_shadow(const void *shadow_start,
                                const void *shadow_end);
 
+#ifndef __HAVE_ARCH_SHADOW_MAP
 static inline void *kasan_mem_to_shadow(const void *addr)
 {
        return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
                + KASAN_SHADOW_OFFSET;
 }
+#endif
 
 int kasan_add_zero_shadow(void *start, unsigned long size);
 void kasan_remove_zero_shadow(void *start, unsigned long size);
index 9d3ac77..fb6c610 100644 (file)
@@ -190,8 +190,6 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
                                      struct kvm_vcpu *except);
-bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
-                               unsigned long *vcpu_bitmap);
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID            0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID       1
@@ -256,11 +254,15 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+union kvm_mmu_notifier_arg {
+       pte_t pte;
+};
+
 struct kvm_gfn_range {
        struct kvm_memory_slot *slot;
        gfn_t start;
        gfn_t end;
-       pte_t pte;
+       union kvm_mmu_notifier_arg arg;
        bool may_block;
 };
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
@@ -865,6 +867,25 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
        unlikely(__ret);                                        \
 })
 
+/*
+ * Note, "data corruption" refers to corruption of host kernel data structures,
+ * not guest data.  Guest data corruption, suspected or confirmed, that is tied
+ * and contained to a single VM should *never* BUG() and potentially panic the
+ * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure
+ * is corrupted and that corruption can have a cascading effect to other parts
+ * of the hosts and/or to other VMs.
+ */
+#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm)                  \
+({                                                             \
+       bool __ret = !!(cond);                                  \
+                                                               \
+       if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION))          \
+               BUG_ON(__ret);                                  \
+       else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged))      \
+               kvm_vm_bugged(kvm);                             \
+       unlikely(__ret);                                        \
+})
+
 static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_PROVE_RCU
@@ -1359,6 +1380,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+                                  const struct kvm_memory_slot *memslot);
 
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
@@ -1387,10 +1411,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                                        unsigned long mask);
 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
 
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
-                                       const struct kvm_memory_slot *memslot);
-#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
+#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
 int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
                      int *is_dirty, struct kvm_memory_slot **memslot);
@@ -1479,11 +1500,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
 {
        return -ENOTSUPP;
 }
+#else
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
+#endif
+
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+                                                   gfn_t gfn, u64 nr_pages)
+{
+       return -EOPNOTSUPP;
+}
+#else
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
 #endif
 
 #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
@@ -2148,8 +2181,6 @@ struct kvm_device_ops {
        int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
 };
 
-void kvm_device_get(struct kvm_device *dev);
-void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
 int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type);
 void kvm_unregister_device_ops(u32 type);
index 7d42810..aa16dc2 100644 (file)
@@ -100,6 +100,7 @@ struct led_classdev {
        const char              *name;
        unsigned int brightness;
        unsigned int max_brightness;
+       unsigned int color;
        int                      flags;
 
        /* Lower 16 bits reflect status */
@@ -313,6 +314,8 @@ extern struct led_classdev *of_led_get(struct device_node *np, int index);
 extern void led_put(struct led_classdev *led_cdev);
 struct led_classdev *__must_check devm_of_led_get(struct device *dev,
                                                  int index);
+struct led_classdev *__must_check devm_of_led_get_optional(struct device *dev,
+                                                 int index);
 
 /**
  * led_blink_set - set blinking with software fallback
index 820f7a3..52d58b1 100644 (file)
@@ -344,7 +344,6 @@ enum {
        ATA_LINK_RESUME_TRIES   = 5,
 
        /* how hard are we gonna try to probe/recover devices */
-       ATA_PROBE_MAX_TRIES     = 3,
        ATA_EH_DEV_TRIES        = 3,
        ATA_EH_PMP_TRIES        = 5,
        ATA_EH_PMP_LINK_TRIES   = 3,
@@ -977,12 +976,6 @@ struct ata_port_operations {
                                        ssize_t size);
 
        /*
-        * Obsolete
-        */
-       void (*phy_reset)(struct ata_port *ap);
-       void (*eng_timeout)(struct ata_port *ap);
-
-       /*
         * ->inherits must be the last field and all the preceding
         * fields must be pointers.
         */
@@ -1116,7 +1109,7 @@ static inline void ata_sas_port_resume(struct ata_port *ap)
 extern int ata_ratelimit(void);
 extern void ata_msleep(struct ata_port *ap, unsigned int msecs);
 extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask,
-                       u32 val, unsigned long interval, unsigned long timeout);
+                            u32 val, unsigned int interval, unsigned int timeout);
 extern int atapi_cmd_type(u8 opcode);
 extern unsigned int ata_pack_xfermask(unsigned int pio_mask,
                                      unsigned int mwdma_mask,
@@ -1166,11 +1159,11 @@ extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *
  * SATA specific code - drivers/ata/libata-sata.c
  */
 #ifdef CONFIG_SATA_HOST
-extern const unsigned long sata_deb_timing_normal[];
-extern const unsigned long sata_deb_timing_hotplug[];
-extern const unsigned long sata_deb_timing_long[];
+extern const unsigned int sata_deb_timing_normal[];
+extern const unsigned int sata_deb_timing_hotplug[];
+extern const unsigned int sata_deb_timing_long[];
 
-static inline const unsigned long *
+static inline const unsigned int *
 sata_ehc_deb_timing(struct ata_eh_context *ehc)
 {
        if (ehc->i.flags & ATA_EHI_HOTPLUGGED)
@@ -1185,14 +1178,14 @@ extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
 extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
 extern int sata_set_spd(struct ata_link *link);
 extern int sata_link_hardreset(struct ata_link *link,
-                       const unsigned long *timing, unsigned long deadline,
+                       const unsigned int *timing, unsigned long deadline,
                        bool *online, int (*check_ready)(struct ata_link *));
-extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
+extern int sata_link_resume(struct ata_link *link, const unsigned int *params,
                            unsigned long deadline);
 extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link);
 extern void ata_eh_analyze_ncq_error(struct ata_link *link);
 #else
-static inline const unsigned long *
+static inline const unsigned int *
 sata_ehc_deb_timing(struct ata_eh_context *ehc)
 {
        return NULL;
@@ -1212,7 +1205,7 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val)
 }
 static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; }
 static inline int sata_link_hardreset(struct ata_link *link,
-                                     const unsigned long *timing,
+                                     const unsigned int *timing,
                                      unsigned long deadline,
                                      bool *online,
                                      int (*check_ready)(struct ata_link *))
@@ -1222,7 +1215,7 @@ static inline int sata_link_hardreset(struct ata_link *link,
        return -EOPNOTSUPP;
 }
 static inline int sata_link_resume(struct ata_link *link,
-                                  const unsigned long *params,
+                                  const unsigned int *params,
                                   unsigned long deadline)
 {
        return -EOPNOTSUPP;
@@ -1234,20 +1227,15 @@ static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link)
 static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { }
 #endif
 extern int sata_link_debounce(struct ata_link *link,
-                       const unsigned long *params, unsigned long deadline);
+                             const unsigned int *params, unsigned long deadline);
 extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
                             bool spm_wakeup);
 extern int ata_slave_link_init(struct ata_port *ap);
-extern void ata_sas_port_destroy(struct ata_port *);
 extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
                                           struct ata_port_info *, struct Scsi_Host *);
-extern void ata_sas_async_probe(struct ata_port *ap);
-extern int ata_sas_sync_probe(struct ata_port *ap);
-extern int ata_sas_port_init(struct ata_port *);
-extern int ata_sas_port_start(struct ata_port *ap);
+extern void ata_port_probe(struct ata_port *ap);
 extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
 extern void ata_sas_tport_delete(struct ata_port *ap);
-extern void ata_sas_port_stop(struct ata_port *ap);
 extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
 extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
 extern void ata_tf_to_fis(const struct ata_taskfile *tf,
@@ -1785,7 +1773,7 @@ static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap,
 {
        struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
 
-       if (unlikely(!qc) || !ap->ops->error_handler)
+       if (unlikely(!qc))
                return qc;
 
        if ((qc->flags & (ATA_QCFLAG_ACTIVE |
@@ -1876,7 +1864,7 @@ static inline int ata_check_ready(u8 status)
 }
 
 static inline unsigned long ata_deadline(unsigned long from_jiffies,
-                                        unsigned long timeout_msecs)
+                                        unsigned int timeout_msecs)
 {
        return from_jiffies + msecs_to_jiffies(timeout_msecs);
 }
index 473545a..6fa2179 100644 (file)
@@ -472,13 +472,7 @@ extern int pm860x_bulk_read(struct i2c_client *, int, int, unsigned char *);
 extern int pm860x_bulk_write(struct i2c_client *, int, int, unsigned char *);
 extern int pm860x_set_bits(struct i2c_client *, int, unsigned char,
                           unsigned char);
-extern int pm860x_page_reg_read(struct i2c_client *, int);
 extern int pm860x_page_reg_write(struct i2c_client *, int, unsigned char);
 extern int pm860x_page_bulk_read(struct i2c_client *, int, int,
                                 unsigned char *);
-extern int pm860x_page_bulk_write(struct i2c_client *, int, int,
-                                 unsigned char *);
-extern int pm860x_page_set_bits(struct i2c_client *, int, unsigned char,
-                               unsigned char);
-
 #endif /* __LINUX_MFD_88PM860X_H */
index 302a330..09fb3c5 100644 (file)
@@ -382,10 +382,6 @@ struct ab8500_platform_data {
        struct ab8500_sysctrl_platform_data *sysctrl;
 };
 
-extern int ab8500_init(struct ab8500 *ab8500,
-                                enum ab8500_version version);
-extern int ab8500_exit(struct ab8500 *ab8500);
-
 extern int ab8500_suspend(struct ab8500 *ab8500);
 
 static inline int is_ab8500(struct ab8500 *ab)
index e7a7e70..dd0fc89 100644 (file)
@@ -556,16 +556,6 @@ static inline void prcmu_clear(unsigned int reg, u32 bits)
 #define PRCMU_QOS_ARM_OPP 3
 #define PRCMU_QOS_DEFAULT_VALUE -1
 
-static inline unsigned long prcmu_qos_get_cpufreq_opp_delay(void)
-{
-       return 0;
-}
-
-static inline int prcmu_qos_requirement(int prcmu_qos_class)
-{
-       return 0;
-}
-
 static inline int prcmu_qos_add_requirement(int prcmu_qos_class,
                                            char *name, s32 value)
 {
@@ -582,15 +572,4 @@ static inline void prcmu_qos_remove_requirement(int prcmu_qos_class, char *name)
 {
 }
 
-static inline int prcmu_qos_add_notifier(int prcmu_qos_class,
-                                        struct notifier_block *notifier)
-{
-       return 0;
-}
-static inline int prcmu_qos_remove_notifier(int prcmu_qos_class,
-                                           struct notifier_block *notifier)
-{
-       return 0;
-}
-
 #endif /* __MACH_PRCMU_H */
index 6a01278..1945568 100644 (file)
@@ -52,7 +52,6 @@
 #define OTMP_D1R_INT_MASK               BIT(OTMP_D1R_INT)
 
 struct hi655x_pmic {
-       struct resource *res;
        struct device *dev;
        struct regmap *regmap;
        struct gpio_desc *gpio;
index 3acceee..ea635d1 100644 (file)
@@ -441,8 +441,4 @@ enum max77686_types {
        TYPE_MAX77802,
 };
 
-extern int max77686_irq_init(struct max77686_dev *max77686);
-extern void max77686_irq_exit(struct max77686_dev *max77686);
-extern int max77686_irq_resume(struct max77686_dev *max77686);
-
 #endif /*  __LINUX_MFD_MAX77686_PRIV_H */
index c5173bc..8421d49 100644 (file)
@@ -151,7 +151,6 @@ struct rz_mtu3 {
        void *priv_data;
 };
 
-#if IS_ENABLED(CONFIG_RZ_MTU3)
 static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch)
 {
        mutex_lock(&ch->lock);
@@ -188,70 +187,5 @@ void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val);
 void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val);
 void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch, u16 off,
                                   u16 pos, u8 val);
-#else
-static inline bool rz_mtu3_request_channel(struct rz_mtu3_channel *ch)
-{
-       return false;
-}
-
-static inline void rz_mtu3_release_channel(struct rz_mtu3_channel *ch)
-{
-}
-
-static inline bool rz_mtu3_is_enabled(struct rz_mtu3_channel *ch)
-{
-       return false;
-}
-
-static inline void rz_mtu3_disable(struct rz_mtu3_channel *ch)
-{
-}
-
-static inline int rz_mtu3_enable(struct rz_mtu3_channel *ch)
-{
-       return 0;
-}
-
-static inline u8 rz_mtu3_8bit_ch_read(struct rz_mtu3_channel *ch, u16 off)
-{
-       return 0;
-}
-
-static inline u16 rz_mtu3_16bit_ch_read(struct rz_mtu3_channel *ch, u16 off)
-{
-       return 0;
-}
-
-static inline u32 rz_mtu3_32bit_ch_read(struct rz_mtu3_channel *ch, u16 off)
-{
-       return 0;
-}
-
-static inline u16 rz_mtu3_shared_reg_read(struct rz_mtu3_channel *ch, u16 off)
-{
-       return 0;
-}
-
-static inline void rz_mtu3_8bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u8 val)
-{
-}
-
-static inline void rz_mtu3_16bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u16 val)
-{
-}
-
-static inline void rz_mtu3_32bit_ch_write(struct rz_mtu3_channel *ch, u16 off, u32 val)
-{
-}
-
-static inline void rz_mtu3_shared_reg_write(struct rz_mtu3_channel *ch, u16 off, u16 val)
-{
-}
-
-static inline void rz_mtu3_shared_reg_update_bit(struct rz_mtu3_channel *ch,
-                                                u16 off, u16 pos, u8 val)
-{
-}
-#endif
 
 #endif /* __MFD_RZ_MTU3_H__ */
index 8bef1ab..4e27ca7 100644 (file)
 #define        PHY_ID_KSZ9477          0x00221631
 
 /* struct phy_device dev_flags definitions */
-#define MICREL_PHY_50MHZ_CLK   0x00000001
-#define MICREL_PHY_FXEN                0x00000002
-#define MICREL_KSZ8_P1_ERRATA  0x00000003
+#define MICREL_PHY_50MHZ_CLK   BIT(0)
+#define MICREL_PHY_FXEN                BIT(1)
+#define MICREL_KSZ8_P1_ERRATA  BIT(2)
+#define MICREL_NO_EEE          BIT(3)
 
 #define MICREL_KSZ9021_EXTREG_CTRL     0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE       0xC
index 4109f1b..f6ef8cf 100644 (file)
 struct nvmefc_ls_req {
        void                    *rqstaddr;
        dma_addr_t              rqstdma;
-       u32                     rqstlen;
+       __le32                  rqstlen;
        void                    *rspaddr;
        dma_addr_t              rspdma;
-       u32                     rsplen;
+       __le32                  rsplen;
        u32                     timeout;
 
        void                    *private;
@@ -120,7 +120,7 @@ struct nvmefc_ls_req {
 struct nvmefc_ls_rsp {
        void            *rspbuf;
        dma_addr_t      rspdma;
-       u16             rsplen;
+       __le32          rsplen;
 
        void (*done)(struct nvmefc_ls_rsp *rsp);
        void            *nvme_fc_private;       /* LLDD is not to access !! */
index ed67981..6a9ddf2 100644 (file)
@@ -1676,8 +1676,8 @@ int of_overlay_notifier_unregister(struct notifier_block *nb);
 
 #else
 
-static inline int of_overlay_fdt_apply(void *overlay_fdt, u32 overlay_fdt_size,
-                                      int *ovcs_id)
+static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
+                                      int *ovcs_id, struct device_node *target_base)
 {
        return -ENOTSUPP;
 }
index 0f4a890..f86a08b 100644 (file)
@@ -67,6 +67,7 @@ enum OID {
        OID_msOutlookExpress,           /* 1.3.6.1.4.1.311.16.4 */
 
        OID_ntlmssp,                    /* 1.3.6.1.4.1.311.2.2.10 */
+       OID_negoex,                     /* 1.3.6.1.4.1.311.2.2.30 */
 
        OID_spnego,                     /* 1.3.6.1.5.5.2 */
 
index 7d07f87..2b886ea 100644 (file)
@@ -600,7 +600,7 @@ void pcs_get_state(struct phylink_pcs *pcs,
  *
  * The %neg_mode argument should be tested via the phylink_mode_*() family of
  * functions, or for PCS that set pcs->neg_mode true, should be tested
- * against the %PHYLINK_PCS_NEG_* definitions.
+ * against the PHYLINK_PCS_NEG_* definitions.
  */
 int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
               phy_interface_t interface, const unsigned long *advertising,
@@ -630,7 +630,7 @@ void pcs_an_restart(struct phylink_pcs *pcs);
  *
  * The %mode argument should be tested via the phylink_mode_*() family of
  * functions, or for PCS that set pcs->neg_mode true, should be tested
- * against the %PHYLINK_PCS_NEG_* definitions.
+ * against the PHYLINK_PCS_NEG_* definitions.
  */
 void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
                 phy_interface_t interface, int speed, int duplex);
diff --git a/include/linux/platform_data/rtc-ds2404.h b/include/linux/platform_data/rtc-ds2404.h
deleted file mode 100644 (file)
index 22c5382..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * ds2404.h - platform data structure for the DS2404 RTC.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 Sven Schnelle <svens@stackframe.org>
- */
-
-#ifndef __LINUX_DS2404_H
-#define __LINUX_DS2404_H
-
-struct ds2404_platform_data {
-
-       unsigned int gpio_rst;
-       unsigned int gpio_clk;
-       unsigned int gpio_dq;
-};
-#endif
index 04ae1d9..d2f9f69 100644 (file)
@@ -298,7 +298,7 @@ struct pwm_chip {
        int base;
        unsigned int npwm;
 
-       struct pwm_device * (*of_xlate)(struct pwm_chip *pc,
+       struct pwm_device * (*of_xlate)(struct pwm_chip *chip,
                                        const struct of_phandle_args *args);
        unsigned int of_pwm_n_cells;
 
@@ -395,9 +395,9 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
                                         unsigned int index,
                                         const char *label);
 
-struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
+struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip,
                const struct of_phandle_args *args);
-struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc,
+struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip,
                                       const struct of_phandle_args *args);
 
 struct pwm_device *pwm_get(struct device *dev, const char *con_id);
index f29aaaf..006e18d 100644 (file)
@@ -108,6 +108,8 @@ extern const struct raid6_calls raid6_vpermxor1;
 extern const struct raid6_calls raid6_vpermxor2;
 extern const struct raid6_calls raid6_vpermxor4;
 extern const struct raid6_calls raid6_vpermxor8;
+extern const struct raid6_calls raid6_lsx;
+extern const struct raid6_calls raid6_lasx;
 
 struct raid6_recov_calls {
        void (*data2)(int, size_t, int, int, void **);
@@ -123,6 +125,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2;
 extern const struct raid6_recov_calls raid6_recov_avx512;
 extern const struct raid6_recov_calls raid6_recov_s390xc;
 extern const struct raid6_recov_calls raid6_recov_neon;
+extern const struct raid6_recov_calls raid6_recov_lsx;
+extern const struct raid6_recov_calls raid6_recov_lasx;
 
 extern const struct raid6_calls raid6_neonx1;
 extern const struct raid6_calls raid6_neonx2;
index fe8978e..b479569 100644 (file)
@@ -690,6 +690,10 @@ int rproc_detach(struct rproc *rproc);
 int rproc_set_firmware(struct rproc *rproc, const char *fw_name);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
 void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem);
+
+/* from remoteproc_coredump.c */
+void rproc_coredump_cleanup(struct rproc *rproc);
+void rproc_coredump(struct rproc *rproc);
 void rproc_coredump_using_sections(struct rproc *rproc);
 int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size);
 int rproc_coredump_add_custom_segment(struct rproc *rproc,
index a3825ce..51cc21e 100644 (file)
@@ -479,7 +479,6 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
 
 #define anon_vma_init()                do {} while (0)
 #define anon_vma_prepare(vma)  (0)
-#define anon_vma_link(vma)     do {} while (0)
 
 static inline int folio_referenced(struct folio *folio, int is_locked,
                                  struct mem_cgroup *memcg,
index 523c98b..90d8e44 100644 (file)
@@ -64,12 +64,14 @@ struct rpmsg_device {
 };
 
 typedef int (*rpmsg_rx_cb_t)(struct rpmsg_device *, void *, int, void *, u32);
+typedef int (*rpmsg_flowcontrol_cb_t)(struct rpmsg_device *, void *, bool);
 
 /**
  * struct rpmsg_endpoint - binds a local rpmsg address to its user
  * @rpdev: rpmsg channel device
  * @refcount: when this drops to zero, the ept is deallocated
  * @cb: rx callback handler
+ * @flow_cb: remote flow control callback handler
  * @cb_lock: must be taken before accessing/changing @cb
  * @addr: local rpmsg address
  * @priv: private data for the driver's use
@@ -92,6 +94,7 @@ struct rpmsg_endpoint {
        struct rpmsg_device *rpdev;
        struct kref refcount;
        rpmsg_rx_cb_t cb;
+       rpmsg_flowcontrol_cb_t flow_cb;
        struct mutex cb_lock;
        u32 addr;
        void *priv;
@@ -106,6 +109,7 @@ struct rpmsg_endpoint {
  * @probe: invoked when a matching rpmsg channel (i.e. device) is found
  * @remove: invoked when the rpmsg channel is removed
  * @callback: invoked when an inbound message is received on the channel
+ * @flowcontrol: invoked when remote side flow control request is received
  */
 struct rpmsg_driver {
        struct device_driver drv;
@@ -113,6 +117,7 @@ struct rpmsg_driver {
        int (*probe)(struct rpmsg_device *dev);
        void (*remove)(struct rpmsg_device *dev);
        int (*callback)(struct rpmsg_device *, void *, int, void *, u32);
+       int (*flowcontrol)(struct rpmsg_device *, void *, bool);
 };
 
 static inline u16 rpmsg16_to_cpu(struct rpmsg_device *rpdev, __rpmsg16 val)
@@ -192,6 +197,8 @@ __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp,
 
 ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept);
 
+int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst);
+
 #else
 
 static inline int rpmsg_register_device_override(struct rpmsg_device *rpdev,
@@ -316,6 +323,14 @@ static inline ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept)
        return -ENXIO;
 }
 
+static inline int rpmsg_set_flow_control(struct rpmsg_endpoint *ept, bool pause, u32 dst)
+{
+       /* This shouldn't be possible */
+       WARN_ON(1);
+
+       return -ENXIO;
+}
+
 #endif /* IS_ENABLED(CONFIG_RPMSG) */
 
 /* use a macro to avoid include chaining to get THIS_MODULE */
index 1fd9c6a..4c0bcbe 100644 (file)
@@ -146,6 +146,7 @@ struct rtc_device {
 
        time64_t range_min;
        timeu64_t range_max;
+       timeu64_t alarm_offset_max;
        time64_t start_secs;
        time64_t offset_secs;
        bool set_start_time;
index b0d36a9..5cf6f6f 100644 (file)
@@ -25,7 +25,6 @@ struct tca6416_keys_platform_data {
        unsigned int rep:1;     /* enable input subsystem auto repeat */
        uint16_t pinmask;
        uint16_t invert;
-       int irq_is_gpio;
        int use_polling;        /* use polling if Interrupt is not connected*/
 };
 #endif
index b449a46..c99440a 100644 (file)
@@ -53,6 +53,20 @@ enum thermal_notify_event {
        THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */
 };
 
+/**
+ * struct thermal_trip - representation of a point in temperature domain
+ * @temperature: temperature value in miliCelsius
+ * @hysteresis: relative hysteresis in miliCelsius
+ * @type: trip point type
+ * @priv: pointer to driver data associated with this trip
+ */
+struct thermal_trip {
+       int temperature;
+       int hysteresis;
+       enum thermal_trip_type type;
+       void *priv;
+};
+
 struct thermal_zone_device_ops {
        int (*bind) (struct thermal_zone_device *,
                     struct thermal_cooling_device *);
@@ -62,34 +76,16 @@ struct thermal_zone_device_ops {
        int (*set_trips) (struct thermal_zone_device *, int, int);
        int (*change_mode) (struct thermal_zone_device *,
                enum thermal_device_mode);
-       int (*get_trip_type) (struct thermal_zone_device *, int,
-               enum thermal_trip_type *);
-       int (*get_trip_temp) (struct thermal_zone_device *, int, int *);
        int (*set_trip_temp) (struct thermal_zone_device *, int, int);
-       int (*get_trip_hyst) (struct thermal_zone_device *, int, int *);
        int (*set_trip_hyst) (struct thermal_zone_device *, int, int);
        int (*get_crit_temp) (struct thermal_zone_device *, int *);
        int (*set_emul_temp) (struct thermal_zone_device *, int);
-       int (*get_trend) (struct thermal_zone_device *, int,
+       int (*get_trend) (struct thermal_zone_device *, struct thermal_trip *,
                          enum thermal_trend *);
        void (*hot)(struct thermal_zone_device *);
        void (*critical)(struct thermal_zone_device *);
 };
 
-/**
- * struct thermal_trip - representation of a point in temperature domain
- * @temperature: temperature value in miliCelsius
- * @hysteresis: relative hysteresis in miliCelsius
- * @type: trip point type
- * @priv: pointer to driver data associated with this trip
- */
-struct thermal_trip {
-       int temperature;
-       int hysteresis;
-       enum thermal_trip_type type;
-       void *priv;
-};
-
 struct thermal_cooling_device_ops {
        int (*get_max_state) (struct thermal_cooling_device *, unsigned long *);
        int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *);
@@ -304,16 +300,22 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp);
 #endif
 
 #ifdef CONFIG_THERMAL
-struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
-               void *, struct thermal_zone_device_ops *,
-               const struct thermal_zone_params *, int, int);
-
-void thermal_zone_device_unregister(struct thermal_zone_device *);
-
-struct thermal_zone_device *
-thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int,
-                                       void *, struct thermal_zone_device_ops *,
-                                       const struct thermal_zone_params *, int, int);
+struct thermal_zone_device *thermal_zone_device_register_with_trips(
+                                       const char *type,
+                                       struct thermal_trip *trips,
+                                       int num_trips, int mask,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp,
+                                       int passive_delay, int polling_delay);
+
+struct thermal_zone_device *thermal_tripless_zone_device_register(
+                                       const char *type,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp);
+
+void thermal_zone_device_unregister(struct thermal_zone_device *tz);
 
 void *thermal_zone_device_priv(struct thermal_zone_device *tzd);
 const char *thermal_zone_device_type(struct thermal_zone_device *tzd);
@@ -354,15 +356,26 @@ int thermal_zone_device_enable(struct thermal_zone_device *tz);
 int thermal_zone_device_disable(struct thermal_zone_device *tz);
 void thermal_zone_device_critical(struct thermal_zone_device *tz);
 #else
-static inline struct thermal_zone_device *thermal_zone_device_register(
-       const char *type, int trips, int mask, void *devdata,
-       struct thermal_zone_device_ops *ops,
-       const struct thermal_zone_params *tzp,
-       int passive_delay, int polling_delay)
+static inline struct thermal_zone_device *thermal_zone_device_register_with_trips(
+                                       const char *type,
+                                       struct thermal_trip *trips,
+                                       int num_trips, int mask,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp,
+                                       int passive_delay, int polling_delay)
 { return ERR_PTR(-ENODEV); }
-static inline void thermal_zone_device_unregister(
-       struct thermal_zone_device *tz)
+
+static inline struct thermal_zone_device *thermal_tripless_zone_device_register(
+                                       const char *type,
+                                       void *devdata,
+                                       struct thermal_zone_device_ops *ops,
+                                       const struct thermal_zone_params *tzp)
+{ return ERR_PTR(-ENODEV); }
+
+static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 { }
+
 static inline struct thermal_cooling_device *
 thermal_cooling_device_register(const char *type, void *devdata,
        const struct thermal_cooling_device_ops *ops)
index 1311a7f..4cc614a 100644 (file)
@@ -191,10 +191,8 @@ struct virtio_driver {
        void (*scan)(struct virtio_device *dev);
        void (*remove)(struct virtio_device *dev);
        void (*config_changed)(struct virtio_device *dev);
-#ifdef CONFIG_PM
        int (*freeze)(struct virtio_device *dev);
        int (*restore)(struct virtio_device *dev);
-#endif
 };
 
 static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
index 741703b..cb571df 100644 (file)
@@ -856,6 +856,9 @@ static inline int __must_check xa_insert_irq(struct xarray *xa,
  * stores the index into the @id pointer, then stores the entry at
  * that index.  A concurrent lookup will not see an uninitialised @id.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Takes and releases the xa_lock.  May sleep if
  * the @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -886,6 +889,9 @@ static inline __must_check int xa_alloc(struct xarray *xa, u32 *id,
  * stores the index into the @id pointer, then stores the entry at
  * that index.  A concurrent lookup will not see an uninitialised @id.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -916,6 +922,9 @@ static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id,
  * stores the index into the @id pointer, then stores the entry at
  * that index.  A concurrent lookup will not see an uninitialised @id.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Process context.  Takes and releases the xa_lock while
  * disabling interrupts.  May sleep if the @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -949,6 +958,9 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id,
  * The search for an empty entry will start at @next and will wrap
  * around if necessary.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Takes and releases the xa_lock.  May sleep if
  * the @gfp flags permit.
  * Return: 0 if the allocation succeeded without wrapping.  1 if the
@@ -983,6 +995,9 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
  * The search for an empty entry will start at @next and will wrap
  * around if necessary.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
  * Return: 0 if the allocation succeeded without wrapping.  1 if the
@@ -1017,6 +1032,9 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
  * The search for an empty entry will start at @next and will wrap
  * around if necessary.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Process context.  Takes and releases the xa_lock while
  * disabling interrupts.  May sleep if the @gfp flags permit.
  * Return: 0 if the allocation succeeded without wrapping.  1 if the
index 19adacd..3489a1c 100644 (file)
@@ -57,6 +57,7 @@ struct inet_skb_parm {
 #define IPSKB_FRAG_PMTU                BIT(6)
 #define IPSKB_L3SLAVE          BIT(7)
 #define IPSKB_NOPOLICY         BIT(8)
+#define IPSKB_MULTIPATH                BIT(9)
 
        u16                     frag_max_size;
 };
@@ -94,7 +95,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
        ipcm_init(ipcm);
 
        ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
-       ipcm->sockc.tsflags = inet->sk.sk_tsflags;
+       ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
        ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
        ipcm->addr = inet->inet_saddr;
        ipcm->protocol = inet->inet_num;
index c9ff23c..1ba9f4d 100644 (file)
@@ -642,7 +642,10 @@ static inline bool fib6_rules_early_flow_dissect(struct net *net,
        if (!net->ipv6.fib6_rules_require_fldissect)
                return false;
 
-       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       memset(flkeys, 0, sizeof(*flkeys));
+       __skb_flow_dissect(net, skb, &flow_keys_dissector,
+                          flkeys, NULL, 0, 0, 0, flag);
+
        fl6->fl6_sport = flkeys->ports.src;
        fl6->fl6_dport = flkeys->ports.dst;
        fl6->flowi6_proto = flkeys->basic.ip_proto;
index a378eff..f0c1386 100644 (file)
@@ -418,7 +418,10 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
        if (!net->ipv4.fib_rules_require_fldissect)
                return false;
 
-       skb_flow_dissect_flow_keys(skb, flkeys, flag);
+       memset(flkeys, 0, sizeof(*flkeys));
+       __skb_flow_dissect(net, skb, &flow_keys_dissector,
+                          flkeys, NULL, 0, 0, 0, flag);
+
        fl4->fl4_sport = flkeys->ports.src;
        fl4->fl4_dport = flkeys->ports.dst;
        fl4->flowi4_proto = flkeys->basic.ip_proto;
index e8750b4..f346b4e 100644 (file)
@@ -483,15 +483,14 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
                u64_stats_inc(&tstats->tx_packets);
                u64_stats_update_end(&tstats->syncp);
                put_cpu_ptr(tstats);
+               return;
+       }
+
+       if (pkt_len < 0) {
+               DEV_STATS_INC(dev, tx_errors);
+               DEV_STATS_INC(dev, tx_aborted_errors);
        } else {
-               struct net_device_stats *err_stats = &dev->stats;
-
-               if (pkt_len < 0) {
-                       err_stats->tx_errors++;
-                       err_stats->tx_aborted_errors++;
-               } else {
-                       err_stats->tx_dropped++;
-               }
+               DEV_STATS_INC(dev, tx_dropped);
        }
 }
 
index c5bcdf6..e8c76b4 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/sched/signal.h>
+#include <net/compat.h>
 
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
@@ -123,14 +124,17 @@ static inline bool scm_has_secdata(struct socket *sock)
 static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
 {
        struct file *pidfd_file = NULL;
-       int pidfd;
+       int len, pidfd;
 
-       /*
-        * put_cmsg() doesn't return an error if CMSG is truncated,
+       /* put_cmsg() doesn't return an error if CMSG is truncated,
         * that's why we need to opencode these checks here.
         */
-       if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
-           (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
+       if (msg->msg_flags & MSG_CMSG_COMPAT)
+               len = sizeof(struct compat_cmsghdr) + sizeof(int);
+       else
+               len = sizeof(struct cmsghdr) + sizeof(int);
+
+       if (msg->msg_controllen < len) {
                msg->msg_flags |= MSG_CTRUNC;
                return;
        }
index 11d5034..b770261 100644 (file)
@@ -1053,6 +1053,12 @@ static inline void sk_wmem_queued_add(struct sock *sk, int val)
        WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
 }
 
+static inline void sk_forward_alloc_add(struct sock *sk, int val)
+{
+       /* Paired with lockless reads of sk->sk_forward_alloc */
+       WRITE_ONCE(sk->sk_forward_alloc, sk->sk_forward_alloc + val);
+}
+
 void sk_stream_write_space(struct sock *sk);
 
 /* OOB backlog add */
@@ -1377,7 +1383,7 @@ static inline int sk_forward_alloc_get(const struct sock *sk)
        if (sk->sk_prot->forward_alloc_get)
                return sk->sk_prot->forward_alloc_get(sk);
 #endif
-       return sk->sk_forward_alloc;
+       return READ_ONCE(sk->sk_forward_alloc);
 }
 
 static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
@@ -1673,14 +1679,14 @@ static inline void sk_mem_charge(struct sock *sk, int size)
 {
        if (!sk_has_account(sk))
                return;
-       sk->sk_forward_alloc -= size;
+       sk_forward_alloc_add(sk, -size);
 }
 
 static inline void sk_mem_uncharge(struct sock *sk, int size)
 {
        if (!sk_has_account(sk))
                return;
-       sk->sk_forward_alloc += size;
+       sk_forward_alloc_add(sk, size);
        sk_mem_reclaim(sk);
 }
 
@@ -1900,7 +1906,9 @@ struct sockcm_cookie {
 static inline void sockcm_init(struct sockcm_cookie *sockc,
                               const struct sock *sk)
 {
-       *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
+       *sockc = (struct sockcm_cookie) {
+               .tsflags = READ_ONCE(sk->sk_tsflags)
+       };
 }
 
 int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
@@ -2695,9 +2703,9 @@ void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
 static inline void
 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 {
-       ktime_t kt = skb->tstamp;
        struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
-
+       u32 tsflags = READ_ONCE(sk->sk_tsflags);
+       ktime_t kt = skb->tstamp;
        /*
         * generate control messages if
         * - receive time stamping in software requested
@@ -2705,10 +2713,10 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
         * - hardware time stamps available and wanted
         */
        if (sock_flag(sk, SOCK_RCVTSTAMP) ||
-           (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
-           (kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
+           (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
+           (kt && tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
            (hwtstamps->hwtstamp &&
-            (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
+            (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
                __sock_recv_timestamp(msg, sk, skb);
        else
                sock_write_timestamp(sk, kt);
@@ -2730,7 +2738,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
 #define TSFLAGS_ANY      (SOF_TIMESTAMPING_SOFTWARE                    | \
                           SOF_TIMESTAMPING_RAW_HARDWARE)
 
-       if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY)
+       if (sk->sk_flags & FLAGS_RECV_CMSGS ||
+           READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
                __sock_recv_cmsgs(msg, sk, skb);
        else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
                sock_write_timestamp(sk, skb->tstamp);
index a2b8d30..49f768d 100644 (file)
@@ -764,7 +764,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht);
 #define scsi_template_proc_dir(sht) NULL
 #endif
 extern void scsi_scan_host(struct Scsi_Host *);
-extern void scsi_rescan_device(struct device *);
+extern void scsi_rescan_device(struct scsi_device *);
 extern void scsi_remove_host(struct Scsi_Host *);
 extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
 extern int scsi_host_busy(struct Scsi_Host *shost);
index c9a8bce..d70c55f 100644 (file)
@@ -142,7 +142,7 @@ struct snd_dmaengine_pcm_config {
                        struct snd_pcm_substream *substream);
        int (*process)(struct snd_pcm_substream *substream,
                       int channel, unsigned long hwoff,
-                      struct iov_iter *buf, unsigned long bytes);
+                      unsigned long bytes);
        dma_filter_fn compat_filter_fn;
        struct device *dma_dev;
        const char *chan_names[SNDRV_PCM_STREAM_LAST + 1];
index 17bea31..ceca69b 100644 (file)
@@ -139,7 +139,7 @@ struct snd_soc_component_driver {
                struct snd_pcm_audio_tstamp_report *audio_tstamp_report);
        int (*copy)(struct snd_soc_component *component,
                    struct snd_pcm_substream *substream, int channel,
-                   unsigned long pos, struct iov_iter *buf,
+                   unsigned long pos, struct iov_iter *iter,
                    unsigned long bytes);
        struct page *(*page)(struct snd_soc_component *component,
                             struct snd_pcm_substream *substream,
@@ -511,7 +511,7 @@ int snd_soc_pcm_component_ioctl(struct snd_pcm_substream *substream,
 int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream);
 int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
                               int channel, unsigned long pos,
-                              struct iov_iter *buf, unsigned long bytes);
+                              struct iov_iter *iter, unsigned long bytes);
 struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
                                        unsigned long offset);
 int snd_soc_pcm_component_mmap(struct snd_pcm_substream *substream,
index ee0bcff..9b73197 100644 (file)
@@ -445,6 +445,8 @@ typedef struct elf64_shdr {
 #define NT_MIPS_DSP    0x800           /* MIPS DSP ASE registers */
 #define NT_MIPS_FP_MODE        0x801           /* MIPS floating-point mode */
 #define NT_MIPS_MSA    0x802           /* MIPS SIMD registers */
+#define NT_RISCV_CSR   0x900           /* RISC-V Control and Status Registers */
+#define NT_RISCV_VECTOR        0x901           /* RISC-V vector registers */
 #define NT_LOONGARCH_CPUCFG    0xa00   /* LoongArch CPU config registers */
 #define NT_LOONGARCH_CSR       0xa01   /* LoongArch control and status registers */
 #define NT_LOONGARCH_LSX       0xa02   /* LoongArch Loongson SIMD Extension registers */
index b3fcab1..db92a72 100644 (file)
  *  - add FUSE_EXT_GROUPS
  *  - add FUSE_CREATE_SUPP_GROUP
  *  - add FUSE_HAS_EXPIRE_ONLY
+ *
+ *  7.39
+ *  - add FUSE_DIRECT_IO_RELAX
+ *  - add FUSE_STATX and related structures
  */
 
 #ifndef _LINUX_FUSE_H
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 38
+#define FUSE_KERNEL_MINOR_VERSION 39
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -269,6 +273,40 @@ struct fuse_attr {
        uint32_t        flags;
 };
 
+/*
+ * The following structures are bit-for-bit compatible with the statx(2) ABI in
+ * Linux.
+ */
+struct fuse_sx_time {
+       int64_t         tv_sec;
+       uint32_t        tv_nsec;
+       int32_t         __reserved;
+};
+
+struct fuse_statx {
+       uint32_t        mask;
+       uint32_t        blksize;
+       uint64_t        attributes;
+       uint32_t        nlink;
+       uint32_t        uid;
+       uint32_t        gid;
+       uint16_t        mode;
+       uint16_t        __spare0[1];
+       uint64_t        ino;
+       uint64_t        size;
+       uint64_t        blocks;
+       uint64_t        attributes_mask;
+       struct fuse_sx_time     atime;
+       struct fuse_sx_time     btime;
+       struct fuse_sx_time     ctime;
+       struct fuse_sx_time     mtime;
+       uint32_t        rdev_major;
+       uint32_t        rdev_minor;
+       uint32_t        dev_major;
+       uint32_t        dev_minor;
+       uint64_t        __spare2[14];
+};
+
 struct fuse_kstatfs {
        uint64_t        blocks;
        uint64_t        bfree;
@@ -371,6 +409,8 @@ struct fuse_file_lock {
  * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
  *                     symlink and mknod (single group that matches parent)
  * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
+ * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now
+ *                       allow shared mmap
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -409,6 +449,7 @@ struct fuse_file_lock {
 #define FUSE_HAS_INODE_DAX     (1ULL << 33)
 #define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
 #define FUSE_HAS_EXPIRE_ONLY   (1ULL << 35)
+#define FUSE_DIRECT_IO_RELAX   (1ULL << 36)
 
 /**
  * CUSE INIT request/reply flags
@@ -575,6 +616,7 @@ enum fuse_opcode {
        FUSE_REMOVEMAPPING      = 49,
        FUSE_SYNCFS             = 50,
        FUSE_TMPFILE            = 51,
+       FUSE_STATX              = 52,
 
        /* CUSE specific operations */
        CUSE_INIT               = 4096,
@@ -639,6 +681,22 @@ struct fuse_attr_out {
        struct fuse_attr attr;
 };
 
+struct fuse_statx_in {
+       uint32_t        getattr_flags;
+       uint32_t        reserved;
+       uint64_t        fh;
+       uint32_t        sx_flags;
+       uint32_t        sx_mask;
+};
+
+struct fuse_statx_out {
+       uint64_t        attr_valid;     /* Cache timeout for the attributes */
+       uint32_t        attr_valid_nsec;
+       uint32_t        flags;
+       uint64_t        spare[2];
+       struct fuse_statx stat;
+};
+
 #define FUSE_COMPAT_MKNOD_IN_SIZE 8
 
 struct fuse_mknod_in {
index 8466c2a..ca30232 100644 (file)
@@ -263,6 +263,7 @@ enum nft_chain_attributes {
  * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
  * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32)
  * @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32)
+ * @NFTA_RULE_CHAIN_ID: add the rule to chain by ID, alternative to @NFTA_RULE_CHAIN (NLA_U32)
  */
 enum nft_rule_attributes {
        NFTA_RULE_UNSPEC,
index 1637e68..f0c8da2 100644 (file)
@@ -43,4 +43,14 @@ struct rpmsg_endpoint_info {
  */
 #define RPMSG_RELEASE_DEV_IOCTL        _IOW(0xb5, 0x4, struct rpmsg_endpoint_info)
 
+/**
+ * Get the flow control state of the remote rpmsg char device.
+ */
+#define RPMSG_GET_OUTGOING_FLOWCONTROL _IOR(0xb5, 0x5, int)
+
+/**
+ * Set the flow control state of the local rpmsg char device.
+ */
+#define RPMSG_SET_INCOMING_FLOWCONTROL _IOR(0xb5, 0x6, int)
+
 #endif
index 7c7975f..03f2bea 100644 (file)
@@ -83,7 +83,7 @@ struct utp_upiu_header {
                        union {
                                __u8 tm_function;
                                __u8 query_function;
-                       };
+                       } __attribute__((packed));
                        __u8 response;
                        __u8 status;
                        __u8 ehs_length;
index 300455b..c536788 100644 (file)
@@ -93,6 +93,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
                struct io_uring_sqe *sqe;
                unsigned int sq_idx;
 
+               if (ctx->flags & IORING_SETUP_NO_SQARRAY)
+                       break;
                sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]);
                if (sq_idx > sq_mask)
                        continue;
index 62f3455..1ecc8c7 100644 (file)
@@ -174,6 +174,16 @@ static void io_worker_ref_put(struct io_wq *wq)
                complete(&wq->worker_done);
 }
 
+bool io_wq_worker_stopped(void)
+{
+       struct io_worker *worker = current->worker_private;
+
+       if (WARN_ON_ONCE(!io_wq_current_is_worker()))
+               return true;
+
+       return test_bit(IO_WQ_BIT_EXIT, &worker->wq->state);
+}
+
 static void io_worker_cancel_cb(struct io_worker *worker)
 {
        struct io_wq_acct *acct = io_wq_get_acct(worker);
index 06d9ca9..2b2a640 100644 (file)
@@ -52,6 +52,7 @@ void io_wq_hash_work(struct io_wq_work *work, void *val);
 
 int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
 int io_wq_max_workers(struct io_wq *wq, int *new_count);
+bool io_wq_worker_stopped(void);
 
 static inline bool io_wq_is_hashed(struct io_wq_work *work)
 {
index e767535..783ed0f 100644 (file)
@@ -150,6 +150,31 @@ static void io_queue_sqe(struct io_kiocb *req);
 
 struct kmem_cache *req_cachep;
 
+static int __read_mostly sysctl_io_uring_disabled;
+static int __read_mostly sysctl_io_uring_group = -1;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table kernel_io_uring_disabled_table[] = {
+       {
+               .procname       = "io_uring_disabled",
+               .data           = &sysctl_io_uring_disabled,
+               .maxlen         = sizeof(sysctl_io_uring_disabled),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_TWO,
+       },
+       {
+               .procname       = "io_uring_group",
+               .data           = &sysctl_io_uring_group,
+               .maxlen         = sizeof(gid_t),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {},
+};
+#endif
+
 struct sock *io_uring_get_socket(struct file *file)
 {
 #if defined(CONFIG_UNIX)
@@ -883,7 +908,7 @@ static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
                struct io_uring_cqe *cqe = &ctx->completion_cqes[i];
 
                if (!io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags)) {
-                       if (ctx->task_complete) {
+                       if (ctx->lockless_cq) {
                                spin_lock(&ctx->completion_lock);
                                io_cqring_event_overflow(ctx, cqe->user_data,
                                                        cqe->res, cqe->flags, 0, 0);
@@ -1541,7 +1566,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
 
                if (!(req->flags & REQ_F_CQE_SKIP) &&
                    unlikely(!io_fill_cqe_req(ctx, req))) {
-                       if (ctx->task_complete) {
+                       if (ctx->lockless_cq) {
                                spin_lock(&ctx->completion_lock);
                                io_req_cqe_overflow(req);
                                spin_unlock(&ctx->completion_lock);
@@ -1950,6 +1975,8 @@ fail:
                if (!needs_poll) {
                        if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
                                break;
+                       if (io_wq_worker_stopped())
+                               break;
                        cond_resched();
                        continue;
                }
@@ -4038,9 +4065,30 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
        return io_uring_create(entries, &p, params);
 }
 
+static inline bool io_uring_allowed(void)
+{
+       int disabled = READ_ONCE(sysctl_io_uring_disabled);
+       kgid_t io_uring_group;
+
+       if (disabled == 2)
+               return false;
+
+       if (disabled == 0 || capable(CAP_SYS_ADMIN))
+               return true;
+
+       io_uring_group = make_kgid(&init_user_ns, sysctl_io_uring_group);
+       if (!gid_valid(io_uring_group))
+               return false;
+
+       return in_group_p(io_uring_group);
+}
+
 SYSCALL_DEFINE2(io_uring_setup, u32, entries,
                struct io_uring_params __user *, params)
 {
+       if (!io_uring_allowed())
+               return -EPERM;
+
        return io_uring_setup(entries, params);
 }
 
@@ -4634,6 +4682,10 @@ static int __init io_uring_init(void)
                                offsetof(struct io_kiocb, cmd.data),
                                sizeof_field(struct io_kiocb, cmd.data), NULL);
 
+#ifdef CONFIG_SYSCTL
+       register_sysctl_init("kernel", kernel_io_uring_disabled_table);
+#endif
+
        return 0;
 };
 __initcall(io_uring_init);
index ee2d2c6..bd6c2c7 100644 (file)
@@ -430,7 +430,9 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx,
 
        if (sqd) {
                io_sq_thread_park(sqd);
-               ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
+               /* Don't set affinity for a dying thread */
+               if (sqd->thread)
+                       ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
                io_sq_thread_unpark(sqd);
        }
 
index b0cb763..21d2fa8 100644 (file)
@@ -143,6 +143,8 @@ static const struct audit_nfcfgop_tab audit_nfcfgs[] = {
        { AUDIT_NFT_OP_OBJ_RESET,               "nft_reset_obj"            },
        { AUDIT_NFT_OP_FLOWTABLE_REGISTER,      "nft_register_flowtable"   },
        { AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,    "nft_unregister_flowtable" },
+       { AUDIT_NFT_OP_SETELEM_RESET,           "nft_reset_setelem"        },
+       { AUDIT_NFT_OP_RULE_RESET,              "nft_reset_rule"           },
        { AUDIT_NFT_OP_INVALID,                 "nft_invalid"              },
 };
 
index b5149cf..146824c 100644 (file)
@@ -553,7 +553,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                         void *value, u64 map_flags, gfp_t gfp_flags)
 {
        struct bpf_local_storage_data *old_sdata = NULL;
-       struct bpf_local_storage_elem *selem = NULL;
+       struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
        struct bpf_local_storage *local_storage;
        unsigned long flags;
        int err;
@@ -607,11 +607,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                }
        }
 
-       if (gfp_flags == GFP_KERNEL) {
-               selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
-               if (!selem)
-                       return ERR_PTR(-ENOMEM);
-       }
+       /* A lookup has just been done before and concluded a new selem is
+        * needed. The chance of an unnecessary alloc is unlikely.
+        */
+       alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
+       if (!alloc_selem)
+               return ERR_PTR(-ENOMEM);
 
        raw_spin_lock_irqsave(&local_storage->lock, flags);
 
@@ -623,13 +624,13 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                 * simple.
                 */
                err = -EAGAIN;
-               goto unlock_err;
+               goto unlock;
        }
 
        old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
        err = check_flags(old_sdata, map_flags);
        if (err)
-               goto unlock_err;
+               goto unlock;
 
        if (old_sdata && (map_flags & BPF_F_LOCK)) {
                copy_map_value_locked(&smap->map, old_sdata->data, value,
@@ -638,23 +639,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
                goto unlock;
        }
 
-       if (gfp_flags != GFP_KERNEL) {
-               /* local_storage->lock is held.  Hence, we are sure
-                * we can unlink and uncharge the old_sdata successfully
-                * later.  Hence, instead of charging the new selem now
-                * and then uncharge the old selem later (which may cause
-                * a potential but unnecessary charge failure),  avoid taking
-                * a charge at all here (the "!old_sdata" check) and the
-                * old_sdata will not be uncharged later during
-                * bpf_selem_unlink_storage_nolock().
-                */
-               selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
-               if (!selem) {
-                       err = -ENOMEM;
-                       goto unlock_err;
-               }
-       }
-
+       alloc_selem = NULL;
        /* First, link the new selem to the map */
        bpf_selem_link_map(smap, selem);
 
@@ -665,20 +650,16 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
        if (old_sdata) {
                bpf_selem_unlink_map(SELEM(old_sdata));
                bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
-                                               false, false);
+                                               true, false);
        }
 
 unlock:
        raw_spin_unlock_irqrestore(&local_storage->lock, flags);
-       return SDATA(selem);
-
-unlock_err:
-       raw_spin_unlock_irqrestore(&local_storage->lock, flags);
-       if (selem) {
+       if (alloc_selem) {
                mem_uncharge(smap, owner, smap->elem_size);
-               bpf_selem_free(selem, smap, true);
+               bpf_selem_free(alloc_selem, smap, true);
        }
-       return ERR_PTR(err);
+       return err ? ERR_PTR(err) : SDATA(selem);
 }
 
 static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
@@ -779,7 +760,7 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
                 * of the loop will set the free_cgroup_storage to true.
                 */
                free_storage = bpf_selem_unlink_storage_nolock(
-                       local_storage, selem, false, true);
+                       local_storage, selem, true, true);
        }
        raw_spin_unlock_irqrestore(&local_storage->lock, flags);
 
index 0f8f036..4e3ce05 100644 (file)
@@ -870,7 +870,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins
                       GFP_KERNEL);
        if (!pack)
                return NULL;
-       pack->ptr = module_alloc(BPF_PROG_PACK_SIZE);
+       pack->ptr = bpf_jit_alloc_exec(BPF_PROG_PACK_SIZE);
        if (!pack->ptr) {
                kfree(pack);
                return NULL;
@@ -894,7 +894,7 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
        mutex_lock(&pack_mutex);
        if (size > BPF_PROG_PACK_SIZE) {
                size = round_up(size, PAGE_SIZE);
-               ptr = module_alloc(size);
+               ptr = bpf_jit_alloc_exec(size);
                if (ptr) {
                        bpf_fill_ill_insns(ptr, size);
                        set_vm_flush_reset_perms(ptr);
@@ -932,7 +932,7 @@ void bpf_prog_pack_free(struct bpf_binary_header *hdr)
 
        mutex_lock(&pack_mutex);
        if (hdr->size > BPF_PROG_PACK_SIZE) {
-               module_memfree(hdr);
+               bpf_jit_free_exec(hdr);
                goto out;
        }
 
@@ -956,7 +956,7 @@ void bpf_prog_pack_free(struct bpf_binary_header *hdr)
        if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
                                       BPF_PROG_CHUNK_COUNT, 0) == 0) {
                list_del(&pack->list);
-               module_memfree(pack->ptr);
+               bpf_jit_free_exec(pack->ptr);
                kfree(pack);
        }
 out:
index ebeb069..eb01c31 100644 (file)
@@ -5502,9 +5502,9 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
                }
 
                run_ctx.bpf_cookie = 0;
-               run_ctx.saved_run_ctx = NULL;
                if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
                        /* recursion detected */
+                       __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx);
                        bpf_prog_put(prog);
                        return -EBUSY;
                }
index 78acf28..53ff50c 100644 (file)
@@ -926,13 +926,12 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
        migrate_disable();
        might_fault();
 
+       run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
        if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
                bpf_prog_inc_misses_counter(prog);
                return 0;
        }
-
-       run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
-
        return bpf_prog_start_time();
 }
 
index e8db8d9..4722b99 100644 (file)
@@ -1,3 +1,5 @@
+# Help: Debugging for CI systems and finding regressions
+#
 # The config is based on running daily CI for enterprise Linux distros to
 # seek regressions on linux-next builds on different bare-metal and virtual
 # platforms. It can be used for example,
index 208481d..d087706 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Bootable as a KVM guest
 CONFIG_NET=y
 CONFIG_NET_CORE=y
 CONFIG_NETDEVICES=y
index 81ff078..ebfdc3d 100644 (file)
@@ -1,3 +1,5 @@
+# Help: Disable Power Management
+
 CONFIG_PM=n
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
index 38a7c53..2c6e001 100644 (file)
@@ -1 +1,2 @@
+# Help: Enable Rust
 CONFIG_RUST=y
index 6fac5b4..35f4867 100644 (file)
@@ -1,3 +1,4 @@
+# Help: Debugging options for tip tree testing
 CONFIG_X86_DEBUG_FPU=y
 CONFIG_LOCK_STAT=y
 CONFIG_DEBUG_VM=y
index 436f806..6878b9a 100644 (file)
@@ -1,3 +1,5 @@
+# Help: Bootable as a Xen guest
+#
 # global stuff - these enable us to allow some
 # of the not so generic stuff below for xen
 CONFIG_PARAVIRT=y
index 813cb6c..9443bc6 100644 (file)
@@ -590,6 +590,8 @@ static void kdb_msg_write(const char *msg, int msg_len)
                        continue;
                if (c == dbg_io_ops->cons)
                        continue;
+               if (!c->write)
+                       continue;
                /*
                 * Set oops_in_progress to encourage the console drivers to
                 * disregard their internal spin locks: in the current calling
index 4c1e9a3..f488997 100644 (file)
@@ -160,7 +160,7 @@ if  DMA_CMA
 
 config DMA_NUMA_CMA
        bool "Enable separate DMA Contiguous Memory Area for NUMA Node"
-       default NUMA
+       depends on NUMA
        help
          Enable this option to get numa CMA areas so that NUMA devices
          can get local memory by DMA coherent APIs.
index 88c595e..f005c66 100644 (file)
@@ -473,11 +473,6 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
                return -EBUSY;
        }
 
-       if (memblock_is_region_reserved(rmem->base, rmem->size)) {
-               pr_info("Reserved memory: overlap with other memblock reserved region\n");
-               return -EBUSY;
-       }
-
        if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
            of_get_flat_dt_prop(node, "no-map", NULL))
                return -EINVAL;
index f190651..06366ac 100644 (file)
@@ -637,15 +637,19 @@ static struct dma_debug_entry *__dma_entry_alloc(void)
        return entry;
 }
 
-static void __dma_entry_alloc_check_leak(void)
+/*
+ * This should be called outside of free_entries_lock scope to avoid potential
+ * deadlocks with serial consoles that use DMA.
+ */
+static void __dma_entry_alloc_check_leak(u32 nr_entries)
 {
-       u32 tmp = nr_total_entries % nr_prealloc_entries;
+       u32 tmp = nr_entries % nr_prealloc_entries;
 
        /* Shout each time we tick over some multiple of the initial pool */
        if (tmp < DMA_DEBUG_DYNAMIC_ENTRIES) {
                pr_info("dma_debug_entry pool grown to %u (%u00%%)\n",
-                       nr_total_entries,
-                       (nr_total_entries / nr_prealloc_entries));
+                       nr_entries,
+                       (nr_entries / nr_prealloc_entries));
        }
 }
 
@@ -656,8 +660,10 @@ static void __dma_entry_alloc_check_leak(void)
  */
 static struct dma_debug_entry *dma_entry_alloc(void)
 {
+       bool alloc_check_leak = false;
        struct dma_debug_entry *entry;
        unsigned long flags;
+       u32 nr_entries;
 
        spin_lock_irqsave(&free_entries_lock, flags);
        if (num_free_entries == 0) {
@@ -667,13 +673,17 @@ static struct dma_debug_entry *dma_entry_alloc(void)
                        pr_err("debugging out of memory - disabling\n");
                        return NULL;
                }
-               __dma_entry_alloc_check_leak();
+               alloc_check_leak = true;
+               nr_entries = nr_total_entries;
        }
 
        entry = __dma_entry_alloc();
 
        spin_unlock_irqrestore(&free_entries_lock, flags);
 
+       if (alloc_check_leak)
+               __dma_entry_alloc_check_leak(nr_entries);
+
 #ifdef CONFIG_STACKTRACE
        entry->stack_len = stack_trace_save(entry->stack_entries,
                                            ARRAY_SIZE(entry->stack_entries),
index 1acec2e..b481c48 100644 (file)
@@ -135,9 +135,9 @@ encrypt_mapping:
 remove_mapping:
 #ifdef CONFIG_DMA_DIRECT_REMAP
        dma_common_free_remap(addr, pool_size);
-#endif
-free_page: __maybe_unused
+free_page:
        __free_pages(page, order);
+#endif
 out:
        return ret;
 }
index 2a17704..7d4979d 100644 (file)
@@ -103,3 +103,5 @@ struct printk_message {
        u64                     seq;
        unsigned long           dropped;
 };
+
+bool other_cpu_in_panic(void);
index 357a4d1..7e0b4dd 100644 (file)
@@ -88,7 +88,7 @@ EXPORT_SYMBOL(oops_in_progress);
 static DEFINE_MUTEX(console_mutex);
 
 /*
- * console_sem protects updates to console->seq and console_suspended,
+ * console_sem protects updates to console->seq
  * and also provides serialization for console printing.
  */
 static DEFINE_SEMAPHORE(console_sem, 1);
@@ -361,7 +361,7 @@ static bool panic_in_progress(void)
  * paths in the console code where we end up in places I want
  * locked without the console semaphore held).
  */
-static int console_locked, console_suspended;
+static int console_locked;
 
 /*
  *     Array of consoles built from command line options (console=)
@@ -2308,7 +2308,11 @@ asmlinkage int vprintk_emit(int facility, int level,
                preempt_enable();
        }
 
-       wake_up_klogd();
+       if (in_sched)
+               defer_console_output();
+       else
+               wake_up_klogd();
+
        return printed_len;
 }
 EXPORT_SYMBOL(vprintk_emit);
@@ -2547,22 +2551,46 @@ MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to hig
  */
 void suspend_console(void)
 {
+       struct console *con;
+
        if (!console_suspend_enabled)
                return;
        pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
        pr_flush(1000, true);
-       console_lock();
-       console_suspended = 1;
-       up_console_sem();
+
+       console_list_lock();
+       for_each_console(con)
+               console_srcu_write_flags(con, con->flags | CON_SUSPENDED);
+       console_list_unlock();
+
+       /*
+        * Ensure that all SRCU list walks have completed. All printing
+        * contexts must be able to see that they are suspended so that it
+        * is guaranteed that all printing has stopped when this function
+        * completes.
+        */
+       synchronize_srcu(&console_srcu);
 }
 
 void resume_console(void)
 {
+       struct console *con;
+
        if (!console_suspend_enabled)
                return;
-       down_console_sem();
-       console_suspended = 0;
-       console_unlock();
+
+       console_list_lock();
+       for_each_console(con)
+               console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED);
+       console_list_unlock();
+
+       /*
+        * Ensure that all SRCU list walks have completed. All printing
+        * contexts must be able to see they are no longer suspended so
+        * that they are guaranteed to wake up and resume printing.
+        */
+       synchronize_srcu(&console_srcu);
+
        pr_flush(1000, true);
 }
 
@@ -2585,6 +2613,26 @@ static int console_cpu_notify(unsigned int cpu)
        return 0;
 }
 
+/*
+ * Return true if a panic is in progress on a remote CPU.
+ *
+ * On true, the local CPU should immediately release any printing resources
+ * that may be needed by the panic CPU.
+ */
+bool other_cpu_in_panic(void)
+{
+       if (!panic_in_progress())
+               return false;
+
+       /*
+        * We can use raw_smp_processor_id() here because it is impossible for
+        * the task to be migrated to the panic_cpu, or away from it. If
+        * panic_cpu has already been set, and we're not currently executing on
+        * that CPU, then we never will be.
+        */
+       return atomic_read(&panic_cpu) != raw_smp_processor_id();
+}
+
 /**
  * console_lock - block the console subsystem from printing
  *
@@ -2597,9 +2645,11 @@ void console_lock(void)
 {
        might_sleep();
 
+       /* On panic, the console_lock must be left to the panic cpu. */
+       while (other_cpu_in_panic())
+               msleep(1000);
+
        down_console_sem();
-       if (console_suspended)
-               return;
        console_locked = 1;
        console_may_schedule = 1;
 }
@@ -2615,12 +2665,11 @@ EXPORT_SYMBOL(console_lock);
  */
 int console_trylock(void)
 {
-       if (down_trylock_console_sem())
+       /* On panic, the console_lock must be left to the panic cpu. */
+       if (other_cpu_in_panic())
                return 0;
-       if (console_suspended) {
-               up_console_sem();
+       if (down_trylock_console_sem())
                return 0;
-       }
        console_locked = 1;
        console_may_schedule = 0;
        return 1;
@@ -2634,25 +2683,6 @@ int is_console_locked(void)
 EXPORT_SYMBOL(is_console_locked);
 
 /*
- * Return true when this CPU should unlock console_sem without pushing all
- * messages to the console. This reduces the chance that the console is
- * locked when the panic CPU tries to use it.
- */
-static bool abandon_console_lock_in_panic(void)
-{
-       if (!panic_in_progress())
-               return false;
-
-       /*
-        * We can use raw_smp_processor_id() here because it is impossible for
-        * the task to be migrated to the panic_cpu, or away from it. If
-        * panic_cpu has already been set, and we're not currently executing on
-        * that CPU, then we never will be.
-        */
-       return atomic_read(&panic_cpu) != raw_smp_processor_id();
-}
-
-/*
  * Check if the given console is currently capable and allowed to print
  * records.
  *
@@ -2665,6 +2695,9 @@ static inline bool console_is_usable(struct console *con)
        if (!(flags & CON_ENABLED))
                return false;
 
+       if ((flags & CON_SUSPENDED))
+               return false;
+
        if (!con->write)
                return false;
 
@@ -2948,7 +2981,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
                        any_progress = true;
 
                        /* Allow panic_cpu to take over the consoles safely. */
-                       if (abandon_console_lock_in_panic())
+                       if (other_cpu_in_panic())
                                goto abandon;
 
                        if (do_cond_resched)
@@ -2983,11 +3016,6 @@ void console_unlock(void)
        bool flushed;
        u64 next_seq;
 
-       if (console_suspended) {
-               up_console_sem();
-               return;
-       }
-
        /*
         * Console drivers are called with interrupts disabled, so
         * @console_may_schedule should be cleared before; however, we may
@@ -3045,10 +3073,28 @@ EXPORT_SYMBOL(console_conditional_schedule);
 
 void console_unblank(void)
 {
+       bool found_unblank = false;
        struct console *c;
        int cookie;
 
        /*
+        * First check if there are any consoles implementing the unblank()
+        * callback. If not, there is no reason to continue and take the
+        * console lock, which in particular can be dangerous if
+        * @oops_in_progress is set.
+        */
+       cookie = console_srcu_read_lock();
+       for_each_console_srcu(c) {
+               if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) {
+                       found_unblank = true;
+                       break;
+               }
+       }
+       console_srcu_read_unlock(cookie);
+       if (!found_unblank)
+               return;
+
+       /*
         * Stop console printing because the unblank() callback may
         * assume the console is not within its write() callback.
         *
@@ -3056,6 +3102,16 @@ void console_unblank(void)
         * In that case, attempt a trylock as best-effort.
         */
        if (oops_in_progress) {
+               /* Semaphores are not NMI-safe. */
+               if (in_nmi())
+                       return;
+
+               /*
+                * Attempting to trylock the console lock can deadlock
+                * if another CPU was stopped while modifying the
+                * semaphore. "Hope and pray" that this is not the
+                * current situation.
+                */
                if (down_trylock_console_sem() != 0)
                        return;
        } else
@@ -3085,14 +3141,24 @@ void console_unblank(void)
  */
 void console_flush_on_panic(enum con_flush_mode mode)
 {
+       bool handover;
+       u64 next_seq;
+
        /*
-        * If someone else is holding the console lock, trylock will fail
-        * and may_schedule may be set.  Ignore and proceed to unlock so
-        * that messages are flushed out.  As this can be called from any
-        * context and we don't want to get preempted while flushing,
-        * ensure may_schedule is cleared.
+        * Ignore the console lock and flush out the messages. Attempting a
+        * trylock would not be useful because:
+        *
+        *   - if it is contended, it must be ignored anyway
+        *   - console_lock() and console_trylock() block and fail
+        *     respectively in panic for non-panic CPUs
+        *   - semaphores are not NMI-safe
+        */
+
+       /*
+        * If another context is holding the console lock,
+        * @console_may_schedule might be set. Clear it so that
+        * this context does not call cond_resched() while flushing.
         */
-       console_trylock();
        console_may_schedule = 0;
 
        if (mode == CONSOLE_REPLAY_ALL) {
@@ -3105,15 +3171,15 @@ void console_flush_on_panic(enum con_flush_mode mode)
                cookie = console_srcu_read_lock();
                for_each_console_srcu(c) {
                        /*
-                        * If the above console_trylock() failed, this is an
-                        * unsynchronized assignment. But in that case, the
+                        * This is an unsynchronized assignment, but the
                         * kernel is in "hope and pray" mode anyway.
                         */
                        c->seq = seq;
                }
                console_srcu_read_unlock(cookie);
        }
-       console_unlock();
+
+       console_flush_all(false, &next_seq, &handover);
 }
 
 /*
@@ -3679,8 +3745,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
 
                /*
                 * Hold the console_lock to guarantee safe access to
-                * console->seq and to prevent changes to @console_suspended
-                * until all consoles have been processed.
+                * console->seq.
                 */
                console_lock();
 
@@ -3688,6 +3753,11 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
                for_each_console_srcu(c) {
                        if (con && con != c)
                                continue;
+                       /*
+                        * If consoles are not usable, it cannot be expected
+                        * that they make forward progress, so only increment
+                        * @diff for usable consoles.
+                        */
                        if (!console_is_usable(c))
                                continue;
                        printk_seq = c->seq;
@@ -3696,18 +3766,12 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
                }
                console_srcu_read_unlock(cookie);
 
-               /*
-                * If consoles are suspended, it cannot be expected that they
-                * make forward progress, so timeout immediately. @diff is
-                * still used to return a valid flush status.
-                */
-               if (console_suspended)
-                       remaining = 0;
-               else if (diff != last_diff && reset_on_progress)
+               if (diff != last_diff && reset_on_progress)
                        remaining = timeout_ms;
 
                console_unlock();
 
+               /* Note: @diff is 0 if there are no usable consoles. */
                if (diff == 0 || remaining == 0)
                        break;
 
@@ -3741,7 +3805,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre
  * printer has been seen to make some forward progress.
  *
  * Context: Process context. May sleep while acquiring console lock.
- * Return: true if all enabled printers are caught up.
+ * Return: true if all usable printers are caught up.
  */
 static bool pr_flush(int timeout_ms, bool reset_on_progress)
 {
@@ -3798,11 +3862,33 @@ static void __wake_up_klogd(int val)
        preempt_enable();
 }
 
+/**
+ * wake_up_klogd - Wake kernel logging daemon
+ *
+ * Use this function when new records have been added to the ringbuffer
+ * and the console printing of those records has already occurred or is
+ * known to be handled by some other context. This function will only
+ * wake the logging daemon.
+ *
+ * Context: Any context.
+ */
 void wake_up_klogd(void)
 {
        __wake_up_klogd(PRINTK_PENDING_WAKEUP);
 }
 
+/**
+ * defer_console_output - Wake kernel logging daemon and trigger
+ *     console printing in a deferred context
+ *
+ * Use this function when new records have been added to the ringbuffer,
+ * this context is responsible for console printing those records, but
+ * the current context is not allowed to perform the console printing.
+ * Trigger an irq_work context to perform the console printing. This
+ * function also wakes the logging daemon.
+ *
+ * Context: Any context.
+ */
 void defer_console_output(void)
 {
        /*
@@ -3819,12 +3905,7 @@ void printk_trigger_flush(void)
 
 int vprintk_deferred(const char *fmt, va_list args)
 {
-       int r;
-
-       r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
-       defer_console_output();
-
-       return r;
+       return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
 }
 
 int _printk_deferred(const char *fmt, ...)
index 2dc4d5a..fde3386 100644 (file)
@@ -1735,7 +1735,7 @@ static bool copy_data(struct prb_data_ring *data_ring,
        if (!buf || !buf_size)
                return true;
 
-       data_size = min_t(u16, buf_size, len);
+       data_size = min_t(unsigned int, buf_size, len);
 
        memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */
        return true;
index ef0f9a2..6d10927 100644 (file)
@@ -38,13 +38,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
         * Use the main logbuf even in NMI. But avoid calling console
         * drivers that might have their own locks.
         */
-       if (this_cpu_read(printk_context) || in_nmi()) {
-               int len;
-
-               len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
-               defer_console_output();
-               return len;
-       }
+       if (this_cpu_read(printk_context) || in_nmi())
+               return vprintk_deferred(fmt, args);
 
        /* No obstacles. */
        return vprintk_default(fmt, args);
index 319cfbe..fa307f9 100644 (file)
@@ -2237,6 +2237,17 @@ config TEST_DIV64
 
          If unsure, say N.
 
+config TEST_IOV_ITER
+       tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS
+       depends on KUNIT
+       default KUNIT_ALL_TESTS
+       help
+         Enable this to turn on testing of the operation of the I/O iterator
+         (iov_iter). This test is executed only once during system boot (so
+         affects only boot time), or at module load time.
+
+         If unsure, say N.
+
 config KPROBES_SANITY_TEST
        tristate "Kprobes sanity tests" if !KUNIT_ALL_TESTS
        depends on DEBUG_KERNEL
index 2e08397..740109b 100644 (file)
@@ -64,6 +64,7 @@ obj-$(CONFIG_TEST_BITOPS) += test_bitops.o
 CFLAGS_test_bitops.o += -Werror
 obj-$(CONFIG_CPUMASK_KUNIT_TEST) += cpumask_kunit.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
+obj-$(CONFIG_TEST_IOV_ITER) += kunit_iov_iter.o
 obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
 obj-$(CONFIG_TEST_IDA) += test_ida.o
 obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
index 7ecdfdb..13f2758 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(idr_alloc);
  * @end: The maximum ID (exclusive).
  * @gfp: Memory allocation flags.
  *
- * Allocates an unused ID in the range specified by @nextid and @end.  If
+ * Allocates an unused ID in the range specified by @start and @end.  If
  * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
  * callers to use @start + N as @end as long as N is within integer range.
  * The search for an unused ID will start at the last ID allocated and will
index b31597b..27234a8 100644 (file)
@@ -1654,14 +1654,14 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
                                           size_t *offset0)
 {
        struct page **p, *page;
-       size_t skip = i->iov_offset, offset;
+       size_t skip = i->iov_offset, offset, size;
        int k;
 
        for (;;) {
                if (i->nr_segs == 0)
                        return 0;
-               maxsize = min(maxsize, i->bvec->bv_len - skip);
-               if (maxsize)
+               size = min(maxsize, i->bvec->bv_len - skip);
+               if (size)
                        break;
                i->iov_offset = 0;
                i->nr_segs--;
@@ -1674,16 +1674,16 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
        offset = skip % PAGE_SIZE;
        *offset0 = offset;
 
-       maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+       maxpages = want_pages_array(pages, size, offset, maxpages);
        if (!maxpages)
                return -ENOMEM;
        p = *pages;
        for (k = 0; k < maxpages; k++)
                p[k] = page + k;
 
-       maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset);
-       iov_iter_advance(i, maxsize);
-       return maxsize;
+       size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
+       iov_iter_advance(i, size);
+       return size;
 }
 
 /*
@@ -1698,14 +1698,14 @@ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
 {
        struct page **p, *page;
        const void *kaddr;
-       size_t skip = i->iov_offset, offset, len;
+       size_t skip = i->iov_offset, offset, len, size;
        int k;
 
        for (;;) {
                if (i->nr_segs == 0)
                        return 0;
-               maxsize = min(maxsize, i->kvec->iov_len - skip);
-               if (maxsize)
+               size = min(maxsize, i->kvec->iov_len - skip);
+               if (size)
                        break;
                i->iov_offset = 0;
                i->nr_segs--;
@@ -1717,13 +1717,13 @@ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
        offset = (unsigned long)kaddr & ~PAGE_MASK;
        *offset0 = offset;
 
-       maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+       maxpages = want_pages_array(pages, size, offset, maxpages);
        if (!maxpages)
                return -ENOMEM;
        p = *pages;
 
        kaddr -= offset;
-       len = offset + maxsize;
+       len = offset + size;
        for (k = 0; k < maxpages; k++) {
                size_t seg = min_t(size_t, len, PAGE_SIZE);
 
@@ -1737,9 +1737,9 @@ static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
                kaddr += PAGE_SIZE;
        }
 
-       maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset);
-       iov_iter_advance(i, maxsize);
-       return maxsize;
+       size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
+       iov_iter_advance(i, size);
+       return size;
 }
 
 /*
diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c
new file mode 100644 (file)
index 0000000..859b67c
--- /dev/null
@@ -0,0 +1,777 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* I/O iterator tests.  This can only test kernel-backed iterator types.
+ *
+ * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/uio.h>
+#include <linux/bvec.h>
+#include <kunit/test.h>
+
+MODULE_DESCRIPTION("iov_iter testing");
+MODULE_AUTHOR("David Howells <dhowells@redhat.com>");
+MODULE_LICENSE("GPL");
+
+struct kvec_test_range {
+       int     from, to;
+};
+
+static const struct kvec_test_range kvec_test_ranges[] = {
+       { 0x00002, 0x00002 },
+       { 0x00027, 0x03000 },
+       { 0x05193, 0x18794 },
+       { 0x20000, 0x20000 },
+       { 0x20000, 0x24000 },
+       { 0x24000, 0x27001 },
+       { 0x29000, 0xffffb },
+       { 0xffffd, 0xffffe },
+       { -1 }
+};
+
+static inline u8 pattern(unsigned long x)
+{
+       return x & 0xff;
+}
+
+static void iov_kunit_unmap(void *data)
+{
+       vunmap(data);
+}
+
+static void *__init iov_kunit_create_buffer(struct kunit *test,
+                                           struct page ***ppages,
+                                           size_t npages)
+{
+       struct page **pages;
+       unsigned long got;
+       void *buffer;
+
+       pages = kunit_kcalloc(test, npages, sizeof(struct page *), GFP_KERNEL);
+        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pages);
+       *ppages = pages;
+
+       got = alloc_pages_bulk_array(GFP_KERNEL, npages, pages);
+       if (got != npages) {
+               release_pages(pages, got);
+               KUNIT_ASSERT_EQ(test, got, npages);
+       }
+
+       buffer = vmap(pages, npages, VM_MAP | VM_MAP_PUT_PAGES, PAGE_KERNEL);
+        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer);
+
+       kunit_add_action_or_reset(test, iov_kunit_unmap, buffer);
+       return buffer;
+}
+
+static void __init iov_kunit_load_kvec(struct kunit *test,
+                                      struct iov_iter *iter, int dir,
+                                      struct kvec *kvec, unsigned int kvmax,
+                                      void *buffer, size_t bufsize,
+                                      const struct kvec_test_range *pr)
+{
+       size_t size = 0;
+       int i;
+
+       for (i = 0; i < kvmax; i++, pr++) {
+               if (pr->from < 0)
+                       break;
+               KUNIT_ASSERT_GE(test, pr->to, pr->from);
+               KUNIT_ASSERT_LE(test, pr->to, bufsize);
+               kvec[i].iov_base = buffer + pr->from;
+               kvec[i].iov_len = pr->to - pr->from;
+               size += pr->to - pr->from;
+       }
+       KUNIT_ASSERT_LE(test, size, bufsize);
+
+       iov_iter_kvec(iter, dir, kvec, i, size);
+}
+
+/*
+ * Test copying to a ITER_KVEC-type iterator.
+ */
+static void __init iov_kunit_copy_to_kvec(struct kunit *test)
+{
+       const struct kvec_test_range *pr;
+       struct iov_iter iter;
+       struct page **spages, **bpages;
+       struct kvec kvec[8];
+       u8 *scratch, *buffer;
+       size_t bufsize, npages, size, copied;
+       int i, patt;
+
+       bufsize = 0x100000;
+       npages = bufsize / PAGE_SIZE;
+
+       scratch = iov_kunit_create_buffer(test, &spages, npages);
+       for (i = 0; i < bufsize; i++)
+               scratch[i] = pattern(i);
+
+       buffer = iov_kunit_create_buffer(test, &bpages, npages);
+       memset(buffer, 0, bufsize);
+
+       iov_kunit_load_kvec(test, &iter, READ, kvec, ARRAY_SIZE(kvec),
+                           buffer, bufsize, kvec_test_ranges);
+       size = iter.count;
+
+       copied = copy_to_iter(scratch, size, &iter);
+
+       KUNIT_EXPECT_EQ(test, copied, size);
+       KUNIT_EXPECT_EQ(test, iter.count, 0);
+       KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
+
+       /* Build the expected image in the scratch buffer. */
+       patt = 0;
+       memset(scratch, 0, bufsize);
+       for (pr = kvec_test_ranges; pr->from >= 0; pr++)
+               for (i = pr->from; i < pr->to; i++)
+                       scratch[i] = pattern(patt++);
+
+       /* Compare the images */
+       for (i = 0; i < bufsize; i++) {
+               KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
+               if (buffer[i] != scratch[i])
+                       return;
+       }
+
+       KUNIT_SUCCEED();
+}
+
+/*
+ * Test copying from a ITER_KVEC-type iterator.
+ */
+static void __init iov_kunit_copy_from_kvec(struct kunit *test)
+{
+       const struct kvec_test_range *pr;
+       struct iov_iter iter;
+       struct page **spages, **bpages;
+       struct kvec kvec[8];
+       u8 *scratch, *buffer;
+       size_t bufsize, npages, size, copied;
+       int i, j;
+
+       bufsize = 0x100000;
+       npages = bufsize / PAGE_SIZE;
+
+       buffer = iov_kunit_create_buffer(test, &bpages, npages);
+       for (i = 0; i < bufsize; i++)
+               buffer[i] = pattern(i);
+
+       scratch = iov_kunit_create_buffer(test, &spages, npages);
+       memset(scratch, 0, bufsize);
+
+       iov_kunit_load_kvec(test, &iter, WRITE, kvec, ARRAY_SIZE(kvec),
+                           buffer, bufsize, kvec_test_ranges);
+       size = min(iter.count, bufsize);
+
+       copied = copy_from_iter(scratch, size, &iter);
+
+       KUNIT_EXPECT_EQ(test, copied, size);
+       KUNIT_EXPECT_EQ(test, iter.count, 0);
+       KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
+
+       /* Build the expected image in the main buffer. */
+       i = 0;
+       memset(buffer, 0, bufsize);
+       for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+               for (j = pr->from; j < pr->to; j++) {
+                       buffer[i++] = pattern(j);
+                       if (i >= bufsize)
+                               goto stop;
+               }
+       }
+stop:
+
+       /* Compare the images */
+       for (i = 0; i < bufsize; i++) {
+               KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
+               if (scratch[i] != buffer[i])
+                       return;
+       }
+
+       KUNIT_SUCCEED();
+}
+
+struct bvec_test_range {
+       int     page, from, to;
+};
+
+static const struct bvec_test_range bvec_test_ranges[] = {
+       { 0, 0x0002, 0x0002 },
+       { 1, 0x0027, 0x0893 },
+       { 2, 0x0193, 0x0794 },
+       { 3, 0x0000, 0x1000 },
+       { 4, 0x0000, 0x1000 },
+       { 5, 0x0000, 0x1000 },
+       { 6, 0x0000, 0x0ffb },
+       { 6, 0x0ffd, 0x0ffe },
+       { -1, -1, -1 }
+};
+
+static void __init iov_kunit_load_bvec(struct kunit *test,
+                                      struct iov_iter *iter, int dir,
+                                      struct bio_vec *bvec, unsigned int bvmax,
+                                      struct page **pages, size_t npages,
+                                      size_t bufsize,
+                                      const struct bvec_test_range *pr)
+{
+       struct page *can_merge = NULL, *page;
+       size_t size = 0;
+       int i;
+
+       for (i = 0; i < bvmax; i++, pr++) {
+               if (pr->from < 0)
+                       break;
+               KUNIT_ASSERT_LT(test, pr->page, npages);
+               KUNIT_ASSERT_LT(test, pr->page * PAGE_SIZE, bufsize);
+               KUNIT_ASSERT_GE(test, pr->from, 0);
+               KUNIT_ASSERT_GE(test, pr->to, pr->from);
+               KUNIT_ASSERT_LE(test, pr->to, PAGE_SIZE);
+
+               page = pages[pr->page];
+               if (pr->from == 0 && pr->from != pr->to && page == can_merge) {
+                       i--;
+                       bvec[i].bv_len += pr->to;
+               } else {
+                       bvec_set_page(&bvec[i], page, pr->to - pr->from, pr->from);
+               }
+
+               size += pr->to - pr->from;
+               if ((pr->to & ~PAGE_MASK) == 0)
+                       can_merge = page + pr->to / PAGE_SIZE;
+               else
+                       can_merge = NULL;
+       }
+
+       iov_iter_bvec(iter, dir, bvec, i, size);
+}
+
+/*
+ * Test copying to a ITER_BVEC-type iterator.
+ */
+static void __init iov_kunit_copy_to_bvec(struct kunit *test)
+{
+       const struct bvec_test_range *pr;
+       struct iov_iter iter;
+       struct bio_vec bvec[8];
+       struct page **spages, **bpages;
+       u8 *scratch, *buffer;
+       size_t bufsize, npages, size, copied;
+       int i, b, patt;
+
+       bufsize = 0x100000;
+       npages = bufsize / PAGE_SIZE;
+
+       scratch = iov_kunit_create_buffer(test, &spages, npages);
+       for (i = 0; i < bufsize; i++)
+               scratch[i] = pattern(i);
+
+       buffer = iov_kunit_create_buffer(test, &bpages, npages);
+       memset(buffer, 0, bufsize);
+
+       iov_kunit_load_bvec(test, &iter, READ, bvec, ARRAY_SIZE(bvec),
+                           bpages, npages, bufsize, bvec_test_ranges);
+       size = iter.count;
+
+       copied = copy_to_iter(scratch, size, &iter);
+
+       KUNIT_EXPECT_EQ(test, copied, size);
+       KUNIT_EXPECT_EQ(test, iter.count, 0);
+       KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
+
+       /* Build the expected image in the scratch buffer. */
+       b = 0;
+       patt = 0;
+       memset(scratch, 0, bufsize);
+       for (pr = bvec_test_ranges; pr->from >= 0; pr++, b++) {
+               u8 *p = scratch + pr->page * PAGE_SIZE;
+
+               for (i = pr->from; i < pr->to; i++)
+                       p[i] = pattern(patt++);
+       }
+
+       /* Compare the images */
+       for (i = 0; i < bufsize; i++) {
+               KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
+               if (buffer[i] != scratch[i])
+                       return;
+       }
+
+       KUNIT_SUCCEED();
+}
+
+/*
+ * Test copying from a ITER_BVEC-type iterator.
+ */
+static void __init iov_kunit_copy_from_bvec(struct kunit *test)
+{
+       const struct bvec_test_range *pr;
+       struct iov_iter iter;
+       struct bio_vec bvec[8];
+       struct page **spages, **bpages;
+       u8 *scratch, *buffer;
+       size_t bufsize, npages, size, copied;
+       int i, j;
+
+       bufsize = 0x100000;
+       npages = bufsize / PAGE_SIZE;
+
+       buffer = iov_kunit_create_buffer(test, &bpages, npages);
+       for (i = 0; i < bufsize; i++)
+               buffer[i] = pattern(i);
+
+       scratch = iov_kunit_create_buffer(test, &spages, npages);
+       memset(scratch, 0, bufsize);
+
+       iov_kunit_load_bvec(test, &iter, WRITE, bvec, ARRAY_SIZE(bvec),
+                           bpages, npages, bufsize, bvec_test_ranges);
+       size = iter.count;
+
+       copied = copy_from_iter(scratch, size, &iter);
+
+       KUNIT_EXPECT_EQ(test, copied, size);
+       KUNIT_EXPECT_EQ(test, iter.count, 0);
+       KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
+
+       /* Build the expected image in the main buffer. */
+       i = 0;
+       memset(buffer, 0, bufsize);
+       for (pr = bvec_test_ranges; pr->from >= 0; pr++) {
+               size_t patt = pr->page * PAGE_SIZE;
+
+               for (j = pr->from; j < pr->to; j++) {
+                       buffer[i++] = pattern(patt + j);
+                       if (i >= bufsize)
+                               goto stop;
+               }
+       }
+stop:
+
+       /* Compare the images */
+       for (i = 0; i < bufsize; i++) {
+               KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
+               if (scratch[i] != buffer[i])
+                       return;
+       }
+
+       KUNIT_SUCCEED();
+}
+
+static void iov_kunit_destroy_xarray(void *data)
+{
+       struct xarray *xarray = data;
+
+       xa_destroy(xarray);
+       kfree(xarray);
+}
+
+static void __init iov_kunit_load_xarray(struct kunit *test,
+                                        struct iov_iter *iter, int dir,
+                                        struct xarray *xarray,
+                                        struct page **pages, size_t npages)
+{
+       size_t size = 0;
+       int i;
+
+       for (i = 0; i < npages; i++) {
+               void *x = xa_store(xarray, i, pages[i], GFP_KERNEL);
+
+               KUNIT_ASSERT_FALSE(test, xa_is_err(x));
+               size += PAGE_SIZE;
+       }
+       iov_iter_xarray(iter, dir, xarray, 0, size);
+}
+
+static struct xarray *iov_kunit_create_xarray(struct kunit *test)
+{
+       struct xarray *xarray;
+
+       xarray = kzalloc(sizeof(struct xarray), GFP_KERNEL);
+       xa_init(xarray);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xarray);
+       kunit_add_action_or_reset(test, iov_kunit_destroy_xarray, xarray);
+       return xarray;
+}
+
+/*
+ * Test copying to a ITER_XARRAY-type iterator.
+ */
+static void __init iov_kunit_copy_to_xarray(struct kunit *test)
+{
+       const struct kvec_test_range *pr;
+       struct iov_iter iter;
+       struct xarray *xarray;
+       struct page **spages, **bpages;
+       u8 *scratch, *buffer;
+       size_t bufsize, npages, size, copied;
+       int i, patt;
+
+       bufsize = 0x100000;
+       npages = bufsize / PAGE_SIZE;
+
+       xarray = iov_kunit_create_xarray(test);
+
+       scratch = iov_kunit_create_buffer(test, &spages, npages);
+       for (i = 0; i < bufsize; i++)
+               scratch[i] = pattern(i);
+
+       buffer = iov_kunit_create_buffer(test, &bpages, npages);
+       memset(buffer, 0, bufsize);
+
+       iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages);
+
+       i = 0;
+       for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+               size = pr->to - pr->from;
+               KUNIT_ASSERT_LE(test, pr->to, bufsize);
+
+               iov_iter_xarray(&iter, READ, xarray, pr->from, size);
+               copied = copy_to_iter(scratch + i, size, &iter);
+
+               KUNIT_EXPECT_EQ(test, copied, size);
+               KUNIT_EXPECT_EQ(test, iter.count, 0);
+               KUNIT_EXPECT_EQ(test, iter.iov_offset, size);
+               i += size;
+       }
+
+       /* Build the expected image in the scratch buffer. */
+       patt = 0;
+       memset(scratch, 0, bufsize);
+       for (pr = kvec_test_ranges; pr->from >= 0; pr++)
+               for (i = pr->from; i < pr->to; i++)
+                       scratch[i] = pattern(patt++);
+
+       /* Compare the images */
+       for (i = 0; i < bufsize; i++) {
+               KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
+               if (buffer[i] != scratch[i])
+                       return;
+       }
+
+       KUNIT_SUCCEED();
+}
+
+/*
+ * Test copying from a ITER_XARRAY-type iterator.
+ */
+static void __init iov_kunit_copy_from_xarray(struct kunit *test)
+{
+       const struct kvec_test_range *pr;
+       struct iov_iter iter;
+       struct xarray *xarray;
+       struct page **spages, **bpages;
+       u8 *scratch, *buffer;
+       size_t bufsize, npages, size, copied;
+       int i, j;
+
+       bufsize = 0x100000;
+       npages = bufsize / PAGE_SIZE;
+
+       xarray = iov_kunit_create_xarray(test);
+
+       buffer = iov_kunit_create_buffer(test, &bpages, npages);
+       for (i = 0; i < bufsize; i++)
+               buffer[i] = pattern(i);
+
+       scratch = iov_kunit_create_buffer(test, &spages, npages);
+       memset(scratch, 0, bufsize);
+
+       iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages);
+
+       i = 0;
+       for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+               size = pr->to - pr->from;
+               KUNIT_ASSERT_LE(test, pr->to, bufsize);
+
+               iov_iter_xarray(&iter, WRITE, xarray, pr->from, size);
+               copied = copy_from_iter(scratch + i, size, &iter);
+
+               KUNIT_EXPECT_EQ(test, copied, size);
+               KUNIT_EXPECT_EQ(test, iter.count, 0);
+               KUNIT_EXPECT_EQ(test, iter.iov_offset, size);
+               i += size;
+       }
+
+       /* Build the expected image in the main buffer. */
+       i = 0;
+       memset(buffer, 0, bufsize);
+       for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+               for (j = pr->from; j < pr->to; j++) {
+                       buffer[i++] = pattern(j);
+                       if (i >= bufsize)
+                               goto stop;
+               }
+       }
+stop:
+
+       /* Compare the images */
+       for (i = 0; i < bufsize; i++) {
+               KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
+               if (scratch[i] != buffer[i])
+                       return;
+       }
+
+       KUNIT_SUCCEED();
+}
+
+/*
+ * Test the extraction of ITER_KVEC-type iterators.
+ */
+static void __init iov_kunit_extract_pages_kvec(struct kunit *test)
+{
+       const struct kvec_test_range *pr;
+       struct iov_iter iter;
+       struct page **bpages, *pagelist[8], **pages = pagelist;
+       struct kvec kvec[8];
+       u8 *buffer;
+       ssize_t len;
+       size_t bufsize, size = 0, npages;
+       int i, from;
+
+       bufsize = 0x100000;
+       npages = bufsize / PAGE_SIZE;
+
+       buffer = iov_kunit_create_buffer(test, &bpages, npages);
+
+       iov_kunit_load_kvec(test, &iter, READ, kvec, ARRAY_SIZE(kvec),
+                           buffer, bufsize, kvec_test_ranges);
+       size = iter.count;
+
+       pr = kvec_test_ranges;
+       from = pr->from;
+       do {
+               size_t offset0 = LONG_MAX;
+
+               for (i = 0; i < ARRAY_SIZE(pagelist); i++)
+                       pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
+
+               len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
+                                            ARRAY_SIZE(pagelist), 0, &offset0);
+               KUNIT_EXPECT_GE(test, len, 0);
+               if (len < 0)
+                       break;
+               KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
+               KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
+               KUNIT_EXPECT_LE(test, len, size);
+               KUNIT_EXPECT_EQ(test, iter.count, size - len);
+               size -= len;
+
+               if (len == 0)
+                       break;
+
+               for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
+                       struct page *p;
+                       ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
+                       int ix;
+
+                       KUNIT_ASSERT_GE(test, part, 0);
+                       while (from == pr->to) {
+                               pr++;
+                               from = pr->from;
+                               if (from < 0)
+                                       goto stop;
+                       }
+                       ix = from / PAGE_SIZE;
+                       KUNIT_ASSERT_LT(test, ix, npages);
+                       p = bpages[ix];
+                       KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
+                       KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
+                       from += part;
+                       len -= part;
+                       KUNIT_ASSERT_GE(test, len, 0);
+                       if (len == 0)
+                               break;
+                       offset0 = 0;
+               }
+
+               if (test->status == KUNIT_FAILURE)
+                       break;
+       } while (iov_iter_count(&iter) > 0);
+
+stop:
+       KUNIT_EXPECT_EQ(test, size, 0);
+       KUNIT_EXPECT_EQ(test, iter.count, 0);
+       KUNIT_SUCCEED();
+}
+
+/*
+ * Test the extraction of ITER_BVEC-type iterators.
+ */
+static void __init iov_kunit_extract_pages_bvec(struct kunit *test)
+{
+       const struct bvec_test_range *pr;
+       struct iov_iter iter;
+       struct page **bpages, *pagelist[8], **pages = pagelist;
+       struct bio_vec bvec[8];
+       ssize_t len;
+       size_t bufsize, size = 0, npages;
+       int i, from;
+
+       bufsize = 0x100000;
+       npages = bufsize / PAGE_SIZE;
+
+       iov_kunit_create_buffer(test, &bpages, npages);
+       iov_kunit_load_bvec(test, &iter, READ, bvec, ARRAY_SIZE(bvec),
+                           bpages, npages, bufsize, bvec_test_ranges);
+       size = iter.count;
+
+       pr = bvec_test_ranges;
+       from = pr->from;
+       do {
+               size_t offset0 = LONG_MAX;
+
+               for (i = 0; i < ARRAY_SIZE(pagelist); i++)
+                       pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
+
+               len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
+                                            ARRAY_SIZE(pagelist), 0, &offset0);
+               KUNIT_EXPECT_GE(test, len, 0);
+               if (len < 0)
+                       break;
+               KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
+               KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
+               KUNIT_EXPECT_LE(test, len, size);
+               KUNIT_EXPECT_EQ(test, iter.count, size - len);
+               size -= len;
+
+               if (len == 0)
+                       break;
+
+               for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
+                       struct page *p;
+                       ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
+                       int ix;
+
+                       KUNIT_ASSERT_GE(test, part, 0);
+                       while (from == pr->to) {
+                               pr++;
+                               from = pr->from;
+                               if (from < 0)
+                                       goto stop;
+                       }
+                       ix = pr->page + from / PAGE_SIZE;
+                       KUNIT_ASSERT_LT(test, ix, npages);
+                       p = bpages[ix];
+                       KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
+                       KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
+                       from += part;
+                       len -= part;
+                       KUNIT_ASSERT_GE(test, len, 0);
+                       if (len == 0)
+                               break;
+                       offset0 = 0;
+               }
+
+               if (test->status == KUNIT_FAILURE)
+                       break;
+       } while (iov_iter_count(&iter) > 0);
+
+stop:
+       KUNIT_EXPECT_EQ(test, size, 0);
+       KUNIT_EXPECT_EQ(test, iter.count, 0);
+       KUNIT_SUCCEED();
+}
+
+/*
+ * Test the extraction of ITER_XARRAY-type iterators.
+ */
+static void __init iov_kunit_extract_pages_xarray(struct kunit *test)
+{
+       const struct kvec_test_range *pr;
+       struct iov_iter iter;
+       struct xarray *xarray;
+       struct page **bpages, *pagelist[8], **pages = pagelist;
+       ssize_t len;
+       size_t bufsize, size = 0, npages;
+       int i, from;
+
+       bufsize = 0x100000;
+       npages = bufsize / PAGE_SIZE;
+
+       xarray = iov_kunit_create_xarray(test);
+
+       iov_kunit_create_buffer(test, &bpages, npages);
+       iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages);
+
+       for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
+               from = pr->from;
+               size = pr->to - from;
+               KUNIT_ASSERT_LE(test, pr->to, bufsize);
+
+               iov_iter_xarray(&iter, WRITE, xarray, from, size);
+
+               do {
+                       size_t offset0 = LONG_MAX;
+
+                       for (i = 0; i < ARRAY_SIZE(pagelist); i++)
+                               pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
+
+                       len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
+                                                    ARRAY_SIZE(pagelist), 0, &offset0);
+                       KUNIT_EXPECT_GE(test, len, 0);
+                       if (len < 0)
+                               break;
+                       KUNIT_EXPECT_LE(test, len, size);
+                       KUNIT_EXPECT_EQ(test, iter.count, size - len);
+                       if (len == 0)
+                               break;
+                       size -= len;
+                       KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
+                       KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
+
+                       for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
+                               struct page *p;
+                               ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
+                               int ix;
+
+                               KUNIT_ASSERT_GE(test, part, 0);
+                               ix = from / PAGE_SIZE;
+                               KUNIT_ASSERT_LT(test, ix, npages);
+                               p = bpages[ix];
+                               KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
+                               KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
+                               from += part;
+                               len -= part;
+                               KUNIT_ASSERT_GE(test, len, 0);
+                               if (len == 0)
+                                       break;
+                               offset0 = 0;
+                       }
+
+                       if (test->status == KUNIT_FAILURE)
+                               goto stop;
+               } while (iov_iter_count(&iter) > 0);
+
+               KUNIT_EXPECT_EQ(test, size, 0);
+               KUNIT_EXPECT_EQ(test, iter.count, 0);
+               KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to - pr->from);
+       }
+
+stop:
+       KUNIT_SUCCEED();
+}
+
+static struct kunit_case __refdata iov_kunit_cases[] = {
+       KUNIT_CASE(iov_kunit_copy_to_kvec),
+       KUNIT_CASE(iov_kunit_copy_from_kvec),
+       KUNIT_CASE(iov_kunit_copy_to_bvec),
+       KUNIT_CASE(iov_kunit_copy_from_bvec),
+       KUNIT_CASE(iov_kunit_copy_to_xarray),
+       KUNIT_CASE(iov_kunit_copy_from_xarray),
+       KUNIT_CASE(iov_kunit_extract_pages_kvec),
+       KUNIT_CASE(iov_kunit_extract_pages_bvec),
+       KUNIT_CASE(iov_kunit_extract_pages_xarray),
+       {}
+};
+
+static struct kunit_suite iov_kunit_suite = {
+       .name = "iov_iter",
+       .test_cases = iov_kunit_cases,
+};
+
+kunit_test_suites(&iov_kunit_suite);
index 45e1761..035b0a4 100644 (file)
@@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
                               vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
+raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
 
 hostprogs      += mktables
 
index a22a05c..0ec534f 100644 (file)
@@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = {
        &raid6_neonx2,
        &raid6_neonx1,
 #endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+       &raid6_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       &raid6_lsx,
+#endif
+#endif
 #if defined(__ia64__)
        &raid6_intx32,
        &raid6_intx16,
@@ -104,6 +112,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #if defined(CONFIG_KERNEL_MODE_NEON)
        &raid6_recov_neon,
 #endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+       &raid6_recov_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       &raid6_recov_lsx,
+#endif
+#endif
        &raid6_recov_intx1,
        NULL
 };
diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h
new file mode 100644 (file)
index 0000000..acfc33c
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * raid6/loongarch.h
+ *
+ * Definitions common to LoongArch RAID-6 code only
+ */
+
+#ifndef _LIB_RAID6_LOONGARCH_H
+#define _LIB_RAID6_LOONGARCH_H
+
+#ifdef __KERNEL__
+
+#include <asm/cpu-features.h>
+#include <asm/fpu.h>
+
+#else /* for user-space testing */
+
+#include <sys/auxv.h>
+
+/* have to supply these defines for glibc 2.37- and musl */
+#ifndef HWCAP_LOONGARCH_LSX
+#define HWCAP_LOONGARCH_LSX    (1 << 4)
+#endif
+#ifndef HWCAP_LOONGARCH_LASX
+#define HWCAP_LOONGARCH_LASX   (1 << 5)
+#endif
+
+#define kernel_fpu_begin()
+#define kernel_fpu_end()
+
+#define cpu_has_lsx    (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX)
+#define cpu_has_lasx   (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX)
+
+#endif /* __KERNEL__ */
+
+#endif /* _LIB_RAID6_LOONGARCH_H */
diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c
new file mode 100644 (file)
index 0000000..aa5d9f9
--- /dev/null
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Based on the generic RAID-6 code (int.uc):
+ *
+ * Copyright 2002-2004 H. Peter Anvin
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * The vector algorithms are currently priority 0, which means the generic
+ * scalar algorithms are not being disabled if vector support is present.
+ * This is like the similar LoongArch RAID5 XOR code, with the main reason
+ * repeated here: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+#define NSIZE 16
+
+static int raid6_has_lsx(void)
+{
+       return cpu_has_lsx;
+}
+
+static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = disks - 3;         /* Highest data disk */
+       p = dptr[z0+1];         /* XOR parity */
+       q = dptr[z0+2];         /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $vr0, $vr1, $vr2, $vr3: wp
+        * $vr4, $vr5, $vr6, $vr7: wq
+        * $vr8, $vr9, $vr10, $vr11: wd
+        * $vr12, $vr13, $vr14, $vr15: w2
+        * $vr16, $vr17, $vr18, $vr19: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*4) {
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+               asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+               asm volatile("vori.b $vr4, $vr0, 0");
+               asm volatile("vori.b $vr5, $vr1, 0");
+               asm volatile("vori.b $vr6, $vr2, 0");
+               asm volatile("vori.b $vr7, $vr3, 0");
+               for (z = z0-1; z >= 0; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+                       asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("vxor.v $vr0, $vr0, $vr8");
+                       asm volatile("vxor.v $vr1, $vr1, $vr9");
+                       asm volatile("vxor.v $vr2, $vr2, $vr10");
+                       asm volatile("vxor.v $vr3, $vr3, $vr11");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("vxor.v $vr16, $vr16, $vr12");
+                       asm volatile("vxor.v $vr17, $vr17, $vr13");
+                       asm volatile("vxor.v $vr18, $vr18, $vr14");
+                       asm volatile("vxor.v $vr19, $vr19, $vr15");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr8");
+                       asm volatile("vxor.v $vr5, $vr17, $vr9");
+                       asm volatile("vxor.v $vr6, $vr18, $vr10");
+                       asm volatile("vxor.v $vr7, $vr19, $vr11");
+               }
+               /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+               asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0]));
+               asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1]));
+               asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2]));
+               asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3]));
+               /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+               asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0]));
+               asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1]));
+               asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2]));
+               asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3]));
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_lsx_xor_syndrome(int disks, int start, int stop,
+                                  size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $vr0, $vr1, $vr2, $vr3: wp
+        * $vr4, $vr5, $vr6, $vr7: wq
+        * $vr8, $vr9, $vr10, $vr11: wd
+        * $vr12, $vr13, $vr14, $vr15: w2
+        * $vr16, $vr17, $vr18, $vr19: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*4) {
+               /* P/Q data pages */
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+               asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+               asm volatile("vori.b $vr4, $vr0, 0");
+               asm volatile("vori.b $vr5, $vr1, 0");
+               asm volatile("vori.b $vr6, $vr2, 0");
+               asm volatile("vori.b $vr7, $vr3, 0");
+               for (z = z0-1; z >= start; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+                       asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("vxor.v $vr0, $vr0, $vr8");
+                       asm volatile("vxor.v $vr1, $vr1, $vr9");
+                       asm volatile("vxor.v $vr2, $vr2, $vr10");
+                       asm volatile("vxor.v $vr3, $vr3, $vr11");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("vxor.v $vr16, $vr16, $vr12");
+                       asm volatile("vxor.v $vr17, $vr17, $vr13");
+                       asm volatile("vxor.v $vr18, $vr18, $vr14");
+                       asm volatile("vxor.v $vr19, $vr19, $vr15");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr8");
+                       asm volatile("vxor.v $vr5, $vr17, $vr9");
+                       asm volatile("vxor.v $vr6, $vr18, $vr10");
+                       asm volatile("vxor.v $vr7, $vr19, $vr11");
+               }
+
+               /* P/Q left side optimization */
+               for (z = start-1; z >= 0; z--) {
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* wq$$ = w1$$ ^ w2$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr12");
+                       asm volatile("vxor.v $vr5, $vr17, $vr13");
+                       asm volatile("vxor.v $vr6, $vr18, $vr14");
+                       asm volatile("vxor.v $vr7, $vr19, $vr15");
+               }
+               /*
+                * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+                * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+                */
+               asm volatile(
+                       "vld $vr20, %0\n\t"
+                       "vld $vr21, %1\n\t"
+                       "vld $vr22, %2\n\t"
+                       "vld $vr23, %3\n\t"
+                       "vld $vr24, %4\n\t"
+                       "vld $vr25, %5\n\t"
+                       "vld $vr26, %6\n\t"
+                       "vld $vr27, %7\n\t"
+                       "vxor.v $vr20, $vr20, $vr0\n\t"
+                       "vxor.v $vr21, $vr21, $vr1\n\t"
+                       "vxor.v $vr22, $vr22, $vr2\n\t"
+                       "vxor.v $vr23, $vr23, $vr3\n\t"
+                       "vxor.v $vr24, $vr24, $vr4\n\t"
+                       "vxor.v $vr25, $vr25, $vr5\n\t"
+                       "vxor.v $vr26, $vr26, $vr6\n\t"
+                       "vxor.v $vr27, $vr27, $vr7\n\t"
+                       "vst $vr20, %0\n\t"
+                       "vst $vr21, %1\n\t"
+                       "vst $vr22, %2\n\t"
+                       "vst $vr23, %3\n\t"
+                       "vst $vr24, %4\n\t"
+                       "vst $vr25, %5\n\t"
+                       "vst $vr26, %6\n\t"
+                       "vst $vr27, %7\n\t"
+                       : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+                         "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]),
+                         "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]),
+                         "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3])
+               );
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lsx = {
+       raid6_lsx_gen_syndrome,
+       raid6_lsx_xor_syndrome,
+       raid6_has_lsx,
+       "lsx",
+       .priority = 0 /* see the comment near the top of the file for reason */
+};
+
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+#define NSIZE 32
+
+static int raid6_has_lasx(void)
+{
+       return cpu_has_lasx;
+}
+
+static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = disks - 3;         /* Highest data disk */
+       p = dptr[z0+1];         /* XOR parity */
+       q = dptr[z0+2];         /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $xr0, $xr1: wp
+        * $xr2, $xr3: wq
+        * $xr4, $xr5: wd
+        * $xr6, $xr7: w2
+        * $xr8, $xr9: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*2) {
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("xvori.b $xr2, $xr0, 0");
+               asm volatile("xvori.b $xr3, $xr1, 0");
+               for (z = z0-1; z >= 0; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("xvxor.v $xr0, $xr0, $xr4");
+                       asm volatile("xvxor.v $xr1, $xr1, $xr5");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("xvxor.v $xr8, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr9, $xr9, $xr7");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr4");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr5");
+               }
+               /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+               asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0]));
+               asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1]));
+               /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+               asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0]));
+               asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1]));
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_lasx_xor_syndrome(int disks, int start, int stop,
+                                   size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $xr0, $xr1: wp
+        * $xr2, $xr3: wq
+        * $xr4, $xr5: wd
+        * $xr6, $xr7: w2
+        * $xr8, $xr9: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*2) {
+               /* P/Q data pages */
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("xvori.b $xr2, $xr0, 0");
+               asm volatile("xvori.b $xr3, $xr1, 0");
+               for (z = z0-1; z >= start; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("xvxor.v $xr0, $xr0, $xr4");
+                       asm volatile("xvxor.v $xr1, $xr1, $xr5");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("xvxor.v $xr8, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr9, $xr9, $xr7");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr4");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr5");
+               }
+
+               /* P/Q left side optimization */
+               for (z = start-1; z >= 0; z--) {
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* wq$$ = w1$$ ^ w2$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr7");
+               }
+               /*
+                * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+                * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+                */
+               asm volatile(
+                       "xvld $xr10, %0\n\t"
+                       "xvld $xr11, %1\n\t"
+                       "xvld $xr12, %2\n\t"
+                       "xvld $xr13, %3\n\t"
+                       "xvxor.v $xr10, $xr10, $xr0\n\t"
+                       "xvxor.v $xr11, $xr11, $xr1\n\t"
+                       "xvxor.v $xr12, $xr12, $xr2\n\t"
+                       "xvxor.v $xr13, $xr13, $xr3\n\t"
+                       "xvst $xr10, %0\n\t"
+                       "xvst $xr11, %1\n\t"
+                       "xvst $xr12, %2\n\t"
+                       "xvst $xr13, %3\n\t"
+                       : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+                         "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1])
+               );
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lasx = {
+       raid6_lasx_gen_syndrome,
+       raid6_lasx_xor_syndrome,
+       raid6_has_lasx,
+       "lasx",
+       .priority = 0 /* see the comment near the top of the file for reason */
+};
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c
new file mode 100644 (file)
index 0000000..94aeac8
--- /dev/null
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Originally based on recov_avx2.c and recov_ssse3.c:
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * Unlike with the syndrome calculation algorithms, there's no boot-time
+ * selection of recovery algorithms by benchmarking, so we have to specify
+ * the priorities and hope the future cores will all have decent vector
+ * support (i.e. no LASX slower than LSX, or even scalar code).
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+static int raid6_has_lsx(void)
+{
+       return cpu_has_lsx;
+}
+
+static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
+                                 int failb, void **ptrs)
+{
+       u8 *p, *q, *dp, *dq;
+       const u8 *pbmul;        /* P multiplier table for B data */
+       const u8 *qmul;         /* Q multiplier table (for both) */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 2] = dp;
+       dq = (u8 *)ptrs[failb];
+       ptrs[failb] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dp;
+       ptrs[failb] = dq;
+       ptrs[disks - 2] = p;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+       kernel_fpu_begin();
+
+       /*
+        * vr20, vr21: qmul
+        * vr22, vr23: pbmul
+        */
+       asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+       asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+
+       while (bytes) {
+               /* vr4 - vr7: Q */
+               asm volatile("vld $vr4, %0" : : "m" (q[0]));
+               asm volatile("vld $vr5, %0" : : "m" (q[16]));
+               asm volatile("vld $vr6, %0" : : "m" (q[32]));
+               asm volatile("vld $vr7, %0" : : "m" (q[48]));
+               /*  vr4 - vr7: Q + Qxy */
+               asm volatile("vld $vr8, %0" : : "m" (dq[0]));
+               asm volatile("vld $vr9, %0" : : "m" (dq[16]));
+               asm volatile("vld $vr10, %0" : : "m" (dq[32]));
+               asm volatile("vld $vr11, %0" : : "m" (dq[48]));
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+               /* vr0 - vr3: P */
+               asm volatile("vld $vr0, %0" : : "m" (p[0]));
+               asm volatile("vld $vr1, %0" : : "m" (p[16]));
+               asm volatile("vld $vr2, %0" : : "m" (p[32]));
+               asm volatile("vld $vr3, %0" : : "m" (p[48]));
+               /* vr0 - vr3: P + Pxy */
+               asm volatile("vld $vr8, %0" : : "m" (dp[0]));
+               asm volatile("vld $vr9, %0" : : "m" (dp[16]));
+               asm volatile("vld $vr10, %0" : : "m" (dp[32]));
+               asm volatile("vld $vr11, %0" : : "m" (dp[48]));
+               asm volatile("vxor.v $vr0, $vr0, $vr8");
+               asm volatile("vxor.v $vr1, $vr1, $vr9");
+               asm volatile("vxor.v $vr2, $vr2, $vr10");
+               asm volatile("vxor.v $vr3, $vr3, $vr11");
+
+               /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
+               asm volatile("vsrli.b $vr8, $vr4, 4");
+               asm volatile("vsrli.b $vr9, $vr5, 4");
+               asm volatile("vsrli.b $vr10, $vr6, 4");
+               asm volatile("vsrli.b $vr11, $vr7, 4");
+               /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
+               asm volatile("vandi.b $vr4, $vr4, 0x0f");
+               asm volatile("vandi.b $vr5, $vr5, 0x0f");
+               asm volatile("vandi.b $vr6, $vr6, 0x0f");
+               asm volatile("vandi.b $vr7, $vr7, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
+               asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
+               asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
+               asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
+               /* lookup from qmul[16] */
+               asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
+               asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
+               asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
+               asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
+               /* vr16 - vr19: B(Q + Qxy) */
+               asm volatile("vxor.v $vr16, $vr8, $vr4");
+               asm volatile("vxor.v $vr17, $vr9, $vr5");
+               asm volatile("vxor.v $vr18, $vr10, $vr6");
+               asm volatile("vxor.v $vr19, $vr11, $vr7");
+
+               /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
+               asm volatile("vsrli.b $vr4, $vr0, 4");
+               asm volatile("vsrli.b $vr5, $vr1, 4");
+               asm volatile("vsrli.b $vr6, $vr2, 4");
+               asm volatile("vsrli.b $vr7, $vr3, 4");
+               /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
+               asm volatile("vandi.b $vr12, $vr0, 0x0f");
+               asm volatile("vandi.b $vr13, $vr1, 0x0f");
+               asm volatile("vandi.b $vr14, $vr2, 0x0f");
+               asm volatile("vandi.b $vr15, $vr3, 0x0f");
+               /* lookup from pbmul[0] */
+               asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
+               asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
+               asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
+               asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
+               /* lookup from pbmul[16] */
+               asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
+               asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
+               asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
+               asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
+               /* vr4 - vr7: A(P + Pxy) */
+               asm volatile("vxor.v $vr4, $vr4, $vr12");
+               asm volatile("vxor.v $vr5, $vr5, $vr13");
+               asm volatile("vxor.v $vr6, $vr6, $vr14");
+               asm volatile("vxor.v $vr7, $vr7, $vr15");
+
+               /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
+               asm volatile("vxor.v $vr4, $vr4, $vr16");
+               asm volatile("vxor.v $vr5, $vr5, $vr17");
+               asm volatile("vxor.v $vr6, $vr6, $vr18");
+               asm volatile("vxor.v $vr7, $vr7, $vr19");
+               asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+               asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+               asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+               asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+               /* vr0 - vr3: P + Pxy + Dx = Dy */
+               asm volatile("vxor.v $vr0, $vr0, $vr4");
+               asm volatile("vxor.v $vr1, $vr1, $vr5");
+               asm volatile("vxor.v $vr2, $vr2, $vr6");
+               asm volatile("vxor.v $vr3, $vr3, $vr7");
+               asm volatile("vst $vr0, %0" : "=m" (dp[0]));
+               asm volatile("vst $vr1, %0" : "=m" (dp[16]));
+               asm volatile("vst $vr2, %0" : "=m" (dp[32]));
+               asm volatile("vst $vr3, %0" : "=m" (dp[48]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dp += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
+                                 void **ptrs)
+{
+       u8 *p, *q, *dq;
+       const u8 *qmul;         /* Q multiplier table */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dq;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+       kernel_fpu_begin();
+
+       /* vr22, vr23: qmul */
+       asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+
+       while (bytes) {
+               /* vr0 - vr3: P + Dx */
+               asm volatile("vld $vr0, %0" : : "m" (p[0]));
+               asm volatile("vld $vr1, %0" : : "m" (p[16]));
+               asm volatile("vld $vr2, %0" : : "m" (p[32]));
+               asm volatile("vld $vr3, %0" : : "m" (p[48]));
+               /* vr4 - vr7: Qx */
+               asm volatile("vld $vr4, %0" : : "m" (dq[0]));
+               asm volatile("vld $vr5, %0" : : "m" (dq[16]));
+               asm volatile("vld $vr6, %0" : : "m" (dq[32]));
+               asm volatile("vld $vr7, %0" : : "m" (dq[48]));
+               /* vr4 - vr7: Q + Qx */
+               asm volatile("vld $vr8, %0" : : "m" (q[0]));
+               asm volatile("vld $vr9, %0" : : "m" (q[16]));
+               asm volatile("vld $vr10, %0" : : "m" (q[32]));
+               asm volatile("vld $vr11, %0" : : "m" (q[48]));
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+
+               /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
+               asm volatile("vsrli.b $vr8, $vr4, 4");
+               asm volatile("vsrli.b $vr9, $vr5, 4");
+               asm volatile("vsrli.b $vr10, $vr6, 4");
+               asm volatile("vsrli.b $vr11, $vr7, 4");
+               /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
+               asm volatile("vandi.b $vr4, $vr4, 0x0f");
+               asm volatile("vandi.b $vr5, $vr5, 0x0f");
+               asm volatile("vandi.b $vr6, $vr6, 0x0f");
+               asm volatile("vandi.b $vr7, $vr7, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
+               asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
+               asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
+               asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
+               /* lookup from qmul[16] */
+               asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
+               asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
+               asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
+               asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
+               /* vr4 - vr7: qmul(Q + Qx) = Dx */
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+               asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+               asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+               asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+               asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+               /* vr0 - vr3: P + Dx + Dx = P */
+               asm volatile("vxor.v $vr0, $vr0, $vr4");
+               asm volatile("vxor.v $vr1, $vr1, $vr5");
+               asm volatile("vxor.v $vr2, $vr2, $vr6");
+               asm volatile("vxor.v $vr3, $vr3, $vr7");
+               asm volatile("vst $vr0, %0" : "=m" (p[0]));
+               asm volatile("vst $vr1, %0" : "=m" (p[16]));
+               asm volatile("vst $vr2, %0" : "=m" (p[32]));
+               asm volatile("vst $vr3, %0" : "=m" (p[48]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lsx = {
+       .data2 = raid6_2data_recov_lsx,
+       .datap = raid6_datap_recov_lsx,
+       .valid = raid6_has_lsx,
+       .name = "lsx",
+       .priority = 1,
+};
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static int raid6_has_lasx(void)
+{
+       return cpu_has_lasx;
+}
+
+static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
+                                  int failb, void **ptrs)
+{
+       u8 *p, *q, *dp, *dq;
+       const u8 *pbmul;        /* P multiplier table for B data */
+       const u8 *qmul;         /* Q multiplier table (for both) */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 2] = dp;
+       dq = (u8 *)ptrs[failb];
+       ptrs[failb] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dp;
+       ptrs[failb] = dq;
+       ptrs[disks - 2] = p;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+       kernel_fpu_begin();
+
+       /*
+        * xr20, xr21: qmul
+        * xr22, xr23: pbmul
+        */
+       asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+       asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+       asm volatile("xvreplve0.q $xr20, $xr20");
+       asm volatile("xvreplve0.q $xr21, $xr21");
+       asm volatile("xvreplve0.q $xr22, $xr22");
+       asm volatile("xvreplve0.q $xr23, $xr23");
+
+       while (bytes) {
+               /* xr0, xr1: Q */
+               asm volatile("xvld $xr0, %0" : : "m" (q[0]));
+               asm volatile("xvld $xr1, %0" : : "m" (q[32]));
+               /* xr0, xr1: Q + Qxy */
+               asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
+               asm volatile("xvxor.v $xr0, $xr0, $xr4");
+               asm volatile("xvxor.v $xr1, $xr1, $xr5");
+               /* xr2, xr3: P */
+               asm volatile("xvld $xr2, %0" : : "m" (p[0]));
+               asm volatile("xvld $xr3, %0" : : "m" (p[32]));
+               /* xr2, xr3: P + Pxy */
+               asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
+               asm volatile("xvsrli.b $xr4, $xr0, 4");
+               asm volatile("xvsrli.b $xr5, $xr1, 4");
+               /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
+               asm volatile("xvandi.b $xr0, $xr0, 0x0f");
+               asm volatile("xvandi.b $xr1, $xr1, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
+               asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
+               /* lookup from qmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
+               /* xr6, xr7: B(Q + Qxy) */
+               asm volatile("xvxor.v $xr6, $xr4, $xr0");
+               asm volatile("xvxor.v $xr7, $xr5, $xr1");
+
+               /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
+               asm volatile("xvsrli.b $xr4, $xr2, 4");
+               asm volatile("xvsrli.b $xr5, $xr3, 4");
+               /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
+               asm volatile("xvandi.b $xr0, $xr2, 0x0f");
+               asm volatile("xvandi.b $xr1, $xr3, 0x0f");
+               /* lookup from pbmul[0] */
+               asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
+               asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
+               /* lookup from pbmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+               /* xr0, xr1: A(P + Pxy) */
+               asm volatile("xvxor.v $xr0, $xr0, $xr4");
+               asm volatile("xvxor.v $xr1, $xr1, $xr5");
+
+               /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
+               asm volatile("xvxor.v $xr0, $xr0, $xr6");
+               asm volatile("xvxor.v $xr1, $xr1, $xr7");
+
+               /* xr2, xr3: P + Pxy + Dx = Dy */
+               asm volatile("xvxor.v $xr2, $xr2, $xr0");
+               asm volatile("xvxor.v $xr3, $xr3, $xr1");
+
+               asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
+               asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
+               asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
+               asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dp += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
+                                  void **ptrs)
+{
+       u8 *p, *q, *dq;
+       const u8 *qmul;         /* Q multiplier table */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dq;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+       kernel_fpu_begin();
+
+       /* xr22, xr23: qmul */
+       asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+       asm volatile("xvreplve0.q $xr22, $xr22");
+       asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+       asm volatile("xvreplve0.q $xr23, $xr23");
+
+       while (bytes) {
+               /* xr0, xr1: P + Dx */
+               asm volatile("xvld $xr0, %0" : : "m" (p[0]));
+               asm volatile("xvld $xr1, %0" : : "m" (p[32]));
+               /* xr2, xr3: Qx */
+               asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
+               asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
+               /* xr2, xr3: Q + Qx */
+               asm volatile("xvld $xr4, %0" : : "m" (q[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (q[32]));
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
+               asm volatile("xvsrli.b $xr4, $xr2, 4");
+               asm volatile("xvsrli.b $xr5, $xr3, 4");
+               /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
+               asm volatile("xvandi.b $xr2, $xr2, 0x0f");
+               asm volatile("xvandi.b $xr3, $xr3, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
+               asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
+               /* lookup from qmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+               /* xr2, xr3: qmul(Q + Qx) = Dx */
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr0, xr1: P + Dx + Dx = P */
+               asm volatile("xvxor.v $xr0, $xr0, $xr2");
+               asm volatile("xvxor.v $xr1, $xr1, $xr3");
+
+               asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
+               asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
+               asm volatile("xvst $xr0, %0" : "=m" (p[0]));
+               asm volatile("xvst $xr1, %0" : "=m" (p[32]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lasx = {
+       .data2 = raid6_2data_recov_lasx,
+       .datap = raid6_datap_recov_lasx,
+       .valid = raid6_has_lasx,
+       .name = "lasx",
+       .priority = 2,
+};
+#endif /* CONFIG_CPU_HAS_LASX */
index 1f693ea..2abe007 100644 (file)
@@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc)
                          gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
 endif
 
+ifeq ($(ARCH),loongarch64)
+        CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1
+        CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' |         \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1)
+        CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' |        \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1)
+endif
+
 ifeq ($(IS_X86),yes)
         OBJS   += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
         CFLAGS += -DCONFIG_X86
@@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes)
         CFLAGS += -DCONFIG_ALTIVEC
         OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
                 vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
+else ifeq ($(ARCH),loongarch64)
+        OBJS += loongarch_simd.o recov_loongarch_simd.o
 endif
 
 .c.o:
index b620cf7..a2707af 100644 (file)
@@ -606,7 +606,7 @@ static void __init numbers_slice(void)
 #define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn)     \
 do {                                                                           \
        const T expect[2] = { expect0, expect1 };                               \
-       T result[2] = {~expect[0], ~expect[1]};                                 \
+       T result[2] = { (T)~expect[0], (T)~expect[1] };                         \
                                                                                \
        _test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]);      \
 } while (0)
index 2071a37..39f07bf 100644 (file)
@@ -206,7 +206,7 @@ static void *xas_descend(struct xa_state *xas, struct xa_node *node)
        void *entry = xa_entry(xas->xa, node, offset);
 
        xas->xa_node = node;
-       if (xa_is_sibling(entry)) {
+       while (xa_is_sibling(entry)) {
                offset = xa_to_sibling(entry);
                entry = xa_entry(xas->xa, node, offset);
                if (node->shift && xa_is_node(entry))
@@ -1802,6 +1802,9 @@ EXPORT_SYMBOL(xa_get_order);
  * stores the index into the @id pointer, then stores the entry at
  * that index.  A concurrent lookup will not see an uninitialised @id.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Expects xa_lock to be held on entry.  May
  * release and reacquire xa_lock if @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory could not be allocated or
@@ -1850,6 +1853,9 @@ EXPORT_SYMBOL(__xa_alloc);
  * The search for an empty entry will start at @next and will wrap
  * around if necessary.
  *
+ * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
+ * in xa_init_flags().
+ *
  * Context: Any context.  Expects xa_lock to be held on entry.  May
  * release and reacquire xa_lock if @gfp flags permit.
  * Return: 0 if the allocation succeeded without wrapping.  1 if the
index bf6219d..582f531 100644 (file)
  *    bdi.wb->list_lock                (zap_pte_range->set_page_dirty)
  *    ->inode->i_lock          (zap_pte_range->set_page_dirty)
  *    ->private_lock           (zap_pte_range->block_dirty_folio)
- *
- * ->i_mmap_rwsem
- *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
 static void page_cache_delete(struct address_space *mapping,
index dcfec27..89895f3 100644 (file)
@@ -139,6 +139,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
        return 0;
 }
 
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
 static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
                                unsigned long end)
 {
@@ -166,8 +170,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
                                if (!p)
                                        return -ENOMEM;
                        } else {
-                               pud_populate(&init_mm, pud,
-                                       early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+                               p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+                               pmd_init(p);
+                               pud_populate(&init_mm, pud, p);
                        }
                }
                zero_pmd_populate(pud, addr, next);
@@ -176,6 +181,10 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
        return 0;
 }
 
+void __weak __meminit pud_init(void *addr)
+{
+}
+
 static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
                                unsigned long end)
 {
@@ -207,8 +216,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
                                if (!p)
                                        return -ENOMEM;
                        } else {
-                               p4d_populate(&init_mm, p4d,
-                                       early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+                               p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+                               pud_init(p);
+                               p4d_populate(&init_mm, p4d, p);
                        }
                }
                zero_pud_populate(p4d, addr, next);
index 2e973b3..f70e3d7 100644 (file)
@@ -291,16 +291,22 @@ struct kasan_stack_ring {
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
+#ifndef __HAVE_ARCH_SHADOW_MAP
 static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
 {
        return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
                << KASAN_SHADOW_SCALE_SHIFT);
 }
+#endif
 
 static __always_inline bool addr_has_metadata(const void *addr)
 {
+#ifdef __HAVE_ARCH_SHADOW_MAP
+       return (kasan_mem_to_shadow((void *)addr) != NULL);
+#else
        return (kasan_reset_tag(addr) >=
                kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
+#endif
 }
 
 /**
index 96fd041..3872528 100644 (file)
@@ -574,13 +574,14 @@ static void rcu_guarded_free(struct rcu_head *h)
  */
 static unsigned long kfence_init_pool(void)
 {
-       unsigned long addr = (unsigned long)__kfence_pool;
+       unsigned long addr;
        struct page *pages;
        int i;
 
        if (!arch_kfence_init_pool())
-               return addr;
+               return (unsigned long)__kfence_pool;
 
+       addr = (unsigned long)__kfence_pool;
        pages = virt_to_page(__kfence_pool);
 
        /*
index 2918150..54c2c90 100644 (file)
@@ -1584,6 +1584,9 @@ static void kmemleak_scan(void)
                for (pfn = start_pfn; pfn < end_pfn; pfn++) {
                        struct page *page = pfn_to_online_page(pfn);
 
+                       if (!(pfn & 63))
+                               cond_resched();
+
                        if (!page)
                                continue;
 
@@ -1594,8 +1597,6 @@ static void kmemleak_scan(void)
                        if (page_count(page) == 0)
                                continue;
                        scan_block(page, page + 1, NULL);
-                       if (!(pfn & 63))
-                               cond_resched();
                }
        }
        put_online_mems();
index 8d6aee0..981af9c 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2925,7 +2925,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill,
                struct anon_vma *av = rmap_item->anon_vma;
 
                anon_vma_lock_read(av);
-               read_lock(&tasklist_lock);
+               rcu_read_lock();
                for_each_process(tsk) {
                        struct anon_vma_chain *vmac;
                        unsigned long addr;
@@ -2944,7 +2944,7 @@ void collect_procs_ksm(struct page *page, struct list_head *to_kill,
                                }
                        }
                }
-               read_unlock(&tasklist_lock);
+               rcu_read_unlock();
                anon_vma_unlock_read(av);
        }
 }
index b29b850..a4d3282 100644 (file)
@@ -5326,7 +5326,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
        INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
        memcg->deferred_split_queue.split_queue_len = 0;
 #endif
-       idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
        lru_gen_init_memcg(memcg);
        return memcg;
 fail:
@@ -5398,14 +5397,27 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
        if (alloc_shrinker_info(memcg))
                goto offline_kmem;
 
-       /* Online state pins memcg ID, memcg ID pins CSS */
-       refcount_set(&memcg->id.ref, 1);
-       css_get(css);
-
        if (unlikely(mem_cgroup_is_root(memcg)))
                queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
                                   FLUSH_TIME);
        lru_gen_online_memcg(memcg);
+
+       /* Online state pins memcg ID, memcg ID pins CSS */
+       refcount_set(&memcg->id.ref, 1);
+       css_get(css);
+
+       /*
+        * Ensure mem_cgroup_from_id() works once we're fully online.
+        *
+        * We could do this earlier and require callers to filter with
+        * css_tryget_online(). But right now there are no users that
+        * need earlier access, and the workingset code relies on the
+        * cgroup tree linkage (mem_cgroup_get_nr_swap_pages()). So
+        * publish it here at the end of onlining. This matches the
+        * regular ID destruction during offlining.
+        */
+       idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+
        return 0;
 offline_kmem:
        memcg_offline_kmem(memcg);
index 1cad190..2dba2cb 100644 (file)
@@ -316,7 +316,7 @@ SYSCALL_DEFINE2(memfd_create,
                return -EINVAL;
 
        if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
-               pr_info_ratelimited(
+               pr_warn_once(
                        "%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
                        current->comm, task_pid_nr(current));
        }
index 881c35e..4d6e43c 100644 (file)
@@ -547,8 +547,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
  * on behalf of the thread group. Return task_struct of the (first found)
  * dedicated thread if found, and return NULL otherwise.
  *
- * We already hold read_lock(&tasklist_lock) in the caller, so we don't
- * have to call rcu_read_lock/unlock() in this function.
+ * We already hold rcu lock in the caller, so we don't have to call
+ * rcu_read_lock/unlock() in this function.
  */
 static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
 {
@@ -609,7 +609,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
                return;
 
        pgoff = page_to_pgoff(page);
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
        for_each_process(tsk) {
                struct anon_vma_chain *vmac;
                struct task_struct *t = task_early_kill(tsk, force_early);
@@ -626,7 +626,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
                        add_to_kill_anon_file(t, page, vma, to_kill);
                }
        }
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
        anon_vma_unlock_read(av);
 }
 
@@ -642,7 +642,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
        pgoff_t pgoff;
 
        i_mmap_lock_read(mapping);
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
        pgoff = page_to_pgoff(page);
        for_each_process(tsk) {
                struct task_struct *t = task_early_kill(tsk, force_early);
@@ -662,7 +662,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
                                add_to_kill_anon_file(t, page, vma, to_kill);
                }
        }
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
        i_mmap_unlock_read(mapping);
 }
 
@@ -685,7 +685,7 @@ static void collect_procs_fsdax(struct page *page,
        struct task_struct *tsk;
 
        i_mmap_lock_read(mapping);
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
        for_each_process(tsk) {
                struct task_struct *t = task_early_kill(tsk, true);
 
@@ -696,7 +696,7 @@ static void collect_procs_fsdax(struct page *page,
                                add_to_kill_fsdax(t, page, vma, to_kill, pgoff);
                }
        }
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
        i_mmap_unlock_read(mapping);
 }
 #endif /* CONFIG_FS_DAX */
@@ -717,7 +717,7 @@ static void collect_procs(struct page *page, struct list_head *tokill,
                collect_procs_file(page, tokill, force_early);
 }
 
-struct hwp_walk {
+struct hwpoison_walk {
        struct to_kill tk;
        unsigned long pfn;
        int flags;
@@ -752,7 +752,7 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
-                                     struct hwp_walk *hwp)
+                                     struct hwpoison_walk *hwp)
 {
        pmd_t pmd = *pmdp;
        unsigned long pfn;
@@ -770,7 +770,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
 }
 #else
 static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
-                                     struct hwp_walk *hwp)
+                                     struct hwpoison_walk *hwp)
 {
        return 0;
 }
@@ -779,7 +779,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
 static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
                              unsigned long end, struct mm_walk *walk)
 {
-       struct hwp_walk *hwp = walk->private;
+       struct hwpoison_walk *hwp = walk->private;
        int ret = 0;
        pte_t *ptep, *mapped_pte;
        spinlock_t *ptl;
@@ -813,7 +813,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
                            unsigned long addr, unsigned long end,
                            struct mm_walk *walk)
 {
-       struct hwp_walk *hwp = walk->private;
+       struct hwpoison_walk *hwp = walk->private;
        pte_t pte = huge_ptep_get(ptep);
        struct hstate *h = hstate_vma(walk->vma);
 
@@ -824,7 +824,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
 #define hwpoison_hugetlb_range NULL
 #endif
 
-static const struct mm_walk_ops hwp_walk_ops = {
+static const struct mm_walk_ops hwpoison_walk_ops = {
        .pmd_entry = hwpoison_pte_range,
        .hugetlb_entry = hwpoison_hugetlb_range,
        .walk_lock = PGWALK_RDLOCK,
@@ -847,7 +847,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
                                  int flags)
 {
        int ret;
-       struct hwp_walk priv = {
+       struct hwpoison_walk priv = {
                .pfn = pfn,
        };
        priv.tk.tsk = p;
@@ -856,7 +856,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
                return -EFAULT;
 
        mmap_read_lock(p->mm);
-       ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+       ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops,
                              (void *)&priv);
        if (ret == 1 && priv.tk.addr)
                kill_proc(&priv.tk, pfn, flags);
@@ -1562,7 +1562,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
         * Here we are interested only in user-mapped pages, so skip any
         * other types of pages.
         */
-       if (PageReserved(p) || PageSlab(p) || PageTable(p))
+       if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
                return true;
        if (!(PageLRU(hpage) || PageHuge(p)))
                return true;
@@ -2533,7 +2533,8 @@ int unpoison_memory(unsigned long pfn)
                goto unlock_mutex;
        }
 
-       if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+       if (folio_test_slab(folio) || PageTable(&folio->page) ||
+           folio_test_reserved(folio) || PageOffline(&folio->page))
                goto unlock_mutex;
 
        /*
index 4524598..0c5be12 100644 (file)
@@ -2641,12 +2641,6 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
        do {
                page = NULL;
                spin_lock_irqsave(&zone->lock, flags);
-               /*
-                * order-0 request can reach here when the pcplist is skipped
-                * due to non-CMA allocation context. HIGHATOMIC area is
-                * reserved for high-order atomic allocation, so order-0
-                * request should skip it.
-                */
                if (alloc_flags & ALLOC_HIGHATOMIC)
                        page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
                if (!page) {
@@ -2780,17 +2774,10 @@ struct page *rmqueue(struct zone *preferred_zone,
        WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
 
        if (likely(pcp_allowed_order(order))) {
-               /*
-                * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
-                * we need to skip it when CMA area isn't allowed.
-                */
-               if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
-                               migratetype != MIGRATE_MOVABLE) {
-                       page = rmqueue_pcplist(preferred_zone, zone, order,
-                                       migratetype, alloc_flags);
-                       if (likely(page))
-                               goto out;
-               }
+               page = rmqueue_pcplist(preferred_zone, zone, order,
+                                      migratetype, alloc_flags);
+               if (likely(page))
+                       goto out;
        }
 
        page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
index f08b655..8cbbfd3 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -1068,7 +1068,9 @@ void mem_dump_obj(void *object)
        if (vmalloc_dump_obj(object))
                return;
 
-       if (virt_addr_valid(object))
+       if (is_vmalloc_addr(object))
+               type = "vmalloc memory";
+       else if (virt_addr_valid(object))
                type = "non-slab/vmalloc memory";
        else if (object == NULL)
                type = "NULL pointer";
index 228a4a5..ef8599d 100644 (file)
@@ -4278,14 +4278,32 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 #ifdef CONFIG_PRINTK
 bool vmalloc_dump_obj(void *object)
 {
-       struct vm_struct *vm;
        void *objp = (void *)PAGE_ALIGN((unsigned long)object);
+       const void *caller;
+       struct vm_struct *vm;
+       struct vmap_area *va;
+       unsigned long addr;
+       unsigned int nr_pages;
 
-       vm = find_vm_area(objp);
-       if (!vm)
+       if (!spin_trylock(&vmap_area_lock))
+               return false;
+       va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
+       if (!va) {
+               spin_unlock(&vmap_area_lock);
                return false;
+       }
+
+       vm = va->vm;
+       if (!vm) {
+               spin_unlock(&vmap_area_lock);
+               return false;
+       }
+       addr = (unsigned long)vm->addr;
+       caller = vm->caller;
+       nr_pages = vm->nr_pages;
+       spin_unlock(&vmap_area_lock);
        pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
-               vm->nr_pages, (unsigned long)vm->addr, vm->caller);
+               nr_pages, addr, caller);
        return true;
 }
 #endif
index 57a7a64..0841f8d 100644 (file)
@@ -543,6 +543,7 @@ struct bpf_fentry_test_t {
 
 int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
 {
+       asm volatile ("");
        return (long)arg;
 }
 
index feaec4a..b28c976 100644 (file)
@@ -974,6 +974,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
        struct sock_exterr_skb *serr;
        struct sk_buff *skb;
        char *state = "UNK";
+       u32 tsflags;
        int err;
 
        jsk = j1939_sk(sk);
@@ -981,13 +982,14 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
        if (!(jsk->state & J1939_SOCK_ERRQUEUE))
                return;
 
+       tsflags = READ_ONCE(sk->sk_tsflags);
        switch (type) {
        case J1939_ERRQUEUE_TX_ACK:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+               if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
                        return;
                break;
        case J1939_ERRQUEUE_TX_SCHED:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+               if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED))
                        return;
                break;
        case J1939_ERRQUEUE_TX_ABORT:
@@ -997,7 +999,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
        case J1939_ERRQUEUE_RX_DPO:
                fallthrough;
        case J1939_ERRQUEUE_RX_ABORT:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+               if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
                        return;
                break;
        default:
@@ -1054,7 +1056,7 @@ static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
        }
 
        serr->opt_stats = true;
-       if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+       if (tsflags & SOF_TIMESTAMPING_OPT_ID)
                serr->ee.ee_data = session->tskey;
 
        netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
index 5eb4898..10a41cd 100644 (file)
@@ -969,6 +969,62 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
        return true;
 }
 
+static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor,
+                                          size_t length)
+{
+       struct ceph_msg_data *data = cursor->data;
+
+       cursor->iov_iter = data->iter;
+       cursor->lastlen = 0;
+       iov_iter_truncate(&cursor->iov_iter, length);
+       cursor->resid = iov_iter_count(&cursor->iov_iter);
+}
+
+static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor,
+                                           size_t *page_offset, size_t *length)
+{
+       struct page *page;
+       ssize_t len;
+
+       if (cursor->lastlen)
+               iov_iter_revert(&cursor->iov_iter, cursor->lastlen);
+
+       len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE,
+                                 1, page_offset);
+       BUG_ON(len < 0);
+
+       cursor->lastlen = len;
+
+       /*
+        * FIXME: The assumption is that the pages represented by the iov_iter
+        *        are pinned, with the references held by the upper-level
+        *        callers, or by virtue of being under writeback. Eventually,
+        *        we'll get an iov_iter_get_pages2 variant that doesn't take
+        *        page refs. Until then, just put the page ref.
+        */
+       VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page);
+       put_page(page);
+
+       *length = min_t(size_t, len, cursor->resid);
+       return page;
+}
+
+static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor,
+                                      size_t bytes)
+{
+       BUG_ON(bytes > cursor->resid);
+       cursor->resid -= bytes;
+
+       if (bytes < cursor->lastlen) {
+               cursor->lastlen -= bytes;
+       } else {
+               iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen);
+               cursor->lastlen = 0;
+       }
+
+       return cursor->resid;
+}
+
 /*
  * Message data is handled (sent or received) in pieces, where each
  * piece resides on a single page.  The network layer might not
@@ -996,6 +1052,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
        case CEPH_MSG_DATA_BVECS:
                ceph_msg_data_bvecs_cursor_init(cursor, length);
                break;
+       case CEPH_MSG_DATA_ITER:
+               ceph_msg_data_iter_cursor_init(cursor, length);
+               break;
        case CEPH_MSG_DATA_NONE:
        default:
                /* BUG(); */
@@ -1013,6 +1072,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
 
        cursor->total_resid = length;
        cursor->data = msg->data;
+       cursor->sr_resid = 0;
 
        __ceph_msg_data_cursor_init(cursor);
 }
@@ -1042,6 +1102,9 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
        case CEPH_MSG_DATA_BVECS:
                page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
                break;
+       case CEPH_MSG_DATA_ITER:
+               page = ceph_msg_data_iter_next(cursor, page_offset, length);
+               break;
        case CEPH_MSG_DATA_NONE:
        default:
                page = NULL;
@@ -1080,6 +1143,9 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes)
        case CEPH_MSG_DATA_BVECS:
                new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
                break;
+       case CEPH_MSG_DATA_ITER:
+               new_piece = ceph_msg_data_iter_advance(cursor, bytes);
+               break;
        case CEPH_MSG_DATA_NONE:
        default:
                BUG();
@@ -1879,6 +1945,18 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 }
 EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
 
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+                           struct iov_iter *iter)
+{
+       struct ceph_msg_data *data;
+
+       data = ceph_msg_data_add(msg);
+       data->type = CEPH_MSG_DATA_ITER;
+       data->iter = *iter;
+
+       msg->data_length += iov_iter_count(&data->iter);
+}
+
 /*
  * construct a new message with given type, size
  * the new msg has a ref count of 1.
index 3d57bb4..f9a50d7 100644 (file)
@@ -159,9 +159,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
 
 static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
 {
-       /* Initialize data cursor */
-
-       ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+       /* Initialize data cursor if it's not a sparse read */
+       if (!msg->sparse_read)
+               ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
 }
 
 /*
@@ -960,9 +960,9 @@ static void process_ack(struct ceph_connection *con)
        prepare_read_tag(con);
 }
 
-static int read_partial_message_section(struct ceph_connection *con,
-                                       struct kvec *section,
-                                       unsigned int sec_len, u32 *crc)
+static int read_partial_message_chunk(struct ceph_connection *con,
+                                     struct kvec *section,
+                                     unsigned int sec_len, u32 *crc)
 {
        int ret, left;
 
@@ -978,11 +978,91 @@ static int read_partial_message_section(struct ceph_connection *con,
                section->iov_len += ret;
        }
        if (section->iov_len == sec_len)
-               *crc = crc32c(0, section->iov_base, section->iov_len);
+               *crc = crc32c(*crc, section->iov_base, section->iov_len);
 
        return 1;
 }
 
+static inline int read_partial_message_section(struct ceph_connection *con,
+                                              struct kvec *section,
+                                              unsigned int sec_len, u32 *crc)
+{
+       *crc = 0;
+       return read_partial_message_chunk(con, section, sec_len, crc);
+}
+
+static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+{
+       struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+       bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
+
+       if (do_bounce && unlikely(!con->bounce_page)) {
+               con->bounce_page = alloc_page(GFP_NOIO);
+               if (!con->bounce_page) {
+                       pr_err("failed to allocate bounce page\n");
+                       return -ENOMEM;
+               }
+       }
+
+       while (cursor->sr_resid > 0) {
+               struct page *page, *rpage;
+               size_t off, len;
+               int ret;
+
+               page = ceph_msg_data_next(cursor, &off, &len);
+               rpage = do_bounce ? con->bounce_page : page;
+
+               /* clamp to what remains in extent */
+               len = min_t(int, len, cursor->sr_resid);
+               ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len);
+               if (ret <= 0)
+                       return ret;
+               *crc = ceph_crc32c_page(*crc, rpage, off, ret);
+               ceph_msg_data_advance(cursor, (size_t)ret);
+               cursor->sr_resid -= ret;
+               if (do_bounce)
+                       memcpy_page(page, off, rpage, off, ret);
+       }
+       return 1;
+}
+
+static int read_sparse_msg_data(struct ceph_connection *con)
+{
+       struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+       bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
+       u32 crc = 0;
+       int ret = 1;
+
+       if (do_datacrc)
+               crc = con->in_data_crc;
+
+       do {
+               if (con->v1.in_sr_kvec.iov_base)
+                       ret = read_partial_message_chunk(con,
+                                                        &con->v1.in_sr_kvec,
+                                                        con->v1.in_sr_len,
+                                                        &crc);
+               else if (cursor->sr_resid > 0)
+                       ret = read_sparse_msg_extent(con, &crc);
+
+               if (ret <= 0) {
+                       if (do_datacrc)
+                               con->in_data_crc = crc;
+                       return ret;
+               }
+
+               memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
+               ret = con->ops->sparse_read(con, cursor,
+                               (char **)&con->v1.in_sr_kvec.iov_base);
+               con->v1.in_sr_len = ret;
+       } while (ret > 0);
+
+       if (do_datacrc)
+               con->in_data_crc = crc;
+
+       return ret < 0 ? ret : 1;  /* must return > 0 to indicate success */
+}
+
 static int read_partial_msg_data(struct ceph_connection *con)
 {
        struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
@@ -1173,7 +1253,9 @@ static int read_partial_message(struct ceph_connection *con)
                if (!m->num_data_items)
                        return -EIO;
 
-               if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+               if (m->sparse_read)
+                       ret = read_sparse_msg_data(con);
+               else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
                        ret = read_partial_msg_data_bounce(con);
                else
                        ret = read_partial_msg_data(con);
index 1df1d29..d09a39f 100644 (file)
@@ -8,9 +8,9 @@
 #include <linux/ceph/ceph_debug.h>
 
 #include <crypto/aead.h>
-#include <crypto/algapi.h>  /* for crypto_memneq() */
 #include <crypto/hash.h>
 #include <crypto/sha2.h>
+#include <crypto/utils.h>
 #include <linux/bvec.h>
 #include <linux/crc32c.h>
 #include <linux/net.h>
 #define FRAME_LATE_STATUS_COMPLETE     0xe
 #define FRAME_LATE_STATUS_ABORTED_MASK 0xf
 
-#define IN_S_HANDLE_PREAMBLE           1
-#define IN_S_HANDLE_CONTROL            2
-#define IN_S_HANDLE_CONTROL_REMAINDER  3
-#define IN_S_PREPARE_READ_DATA         4
-#define IN_S_PREPARE_READ_DATA_CONT    5
-#define IN_S_PREPARE_READ_ENC_PAGE     6
-#define IN_S_HANDLE_EPILOGUE           7
-#define IN_S_FINISH_SKIP               8
+#define IN_S_HANDLE_PREAMBLE                   1
+#define IN_S_HANDLE_CONTROL                    2
+#define IN_S_HANDLE_CONTROL_REMAINDER          3
+#define IN_S_PREPARE_READ_DATA                 4
+#define IN_S_PREPARE_READ_DATA_CONT            5
+#define IN_S_PREPARE_READ_ENC_PAGE             6
+#define IN_S_PREPARE_SPARSE_DATA               7
+#define IN_S_PREPARE_SPARSE_DATA_CONT          8
+#define IN_S_HANDLE_EPILOGUE                   9
+#define IN_S_FINISH_SKIP                       10
 
 #define OUT_S_QUEUE_DATA               1
 #define OUT_S_QUEUE_DATA_CONT          2
@@ -967,12 +969,48 @@ static void init_sgs_cursor(struct scatterlist **sg,
        }
 }
 
+/**
+ * init_sgs_pages: set up scatterlist on an array of page pointers
+ * @sg:                scatterlist to populate
+ * @pages:     pointer to page array
+ * @dpos:      position in the array to start (bytes)
+ * @dlen:      len to add to sg (bytes)
+ * @pad:       pointer to pad destination (if any)
+ *
+ * Populate the scatterlist from the page array, starting at an arbitrary
+ * byte in the array and running for a specified length.
+ */
+static void init_sgs_pages(struct scatterlist **sg, struct page **pages,
+                          int dpos, int dlen, u8 *pad)
+{
+       int idx = dpos >> PAGE_SHIFT;
+       int off = offset_in_page(dpos);
+       int resid = dlen;
+
+       do {
+               int len = min(resid, (int)PAGE_SIZE - off);
+
+               sg_set_page(*sg, pages[idx], len, off);
+               *sg = sg_next(*sg);
+               off = 0;
+               ++idx;
+               resid -= len;
+       } while (resid);
+
+       if (need_padding(dlen)) {
+               sg_set_buf(*sg, pad, padding_len(dlen));
+               *sg = sg_next(*sg);
+       }
+}
+
 static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
                             u8 *front_pad, u8 *middle_pad, u8 *data_pad,
-                            void *epilogue, bool add_tag)
+                            void *epilogue, struct page **pages, int dpos,
+                            bool add_tag)
 {
        struct ceph_msg_data_cursor cursor;
        struct scatterlist *cur_sg;
+       int dlen = data_len(msg);
        int sg_cnt;
        int ret;
 
@@ -986,9 +1024,15 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
        if (middle_len(msg))
                sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
                                      middle_len(msg));
-       if (data_len(msg)) {
-               ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
-               sg_cnt += calc_sg_cnt_cursor(&cursor);
+       if (dlen) {
+               if (pages) {
+                       sg_cnt += calc_pages_for(dpos, dlen);
+                       if (need_padding(dlen))
+                               sg_cnt++;
+               } else {
+                       ceph_msg_data_cursor_init(&cursor, msg, dlen);
+                       sg_cnt += calc_sg_cnt_cursor(&cursor);
+               }
        }
 
        ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
@@ -1002,9 +1046,13 @@ static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
        if (middle_len(msg))
                init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
                         middle_pad);
-       if (data_len(msg)) {
-               ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
-               init_sgs_cursor(&cur_sg, &cursor, data_pad);
+       if (dlen) {
+               if (pages) {
+                       init_sgs_pages(&cur_sg, pages, dpos, dlen, data_pad);
+               } else {
+                       ceph_msg_data_cursor_init(&cursor, msg, dlen);
+                       init_sgs_cursor(&cur_sg, &cursor, data_pad);
+               }
        }
 
        WARN_ON(!sg_is_last(cur_sg));
@@ -1039,10 +1087,53 @@ static int decrypt_control_remainder(struct ceph_connection *con)
                         padded_len(rem_len) + CEPH_GCM_TAG_LEN);
 }
 
+/* Process sparse read data that lives in a buffer */
+static int process_v2_sparse_read(struct ceph_connection *con,
+                                 struct page **pages, int spos)
+{
+       struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+       int ret;
+
+       for (;;) {
+               char *buf = NULL;
+
+               ret = con->ops->sparse_read(con, cursor, &buf);
+               if (ret <= 0)
+                       return ret;
+
+               dout("%s: sparse_read return %x buf %p\n", __func__, ret, buf);
+
+               do {
+                       int idx = spos >> PAGE_SHIFT;
+                       int soff = offset_in_page(spos);
+                       struct page *spage = con->v2.in_enc_pages[idx];
+                       int len = min_t(int, ret, PAGE_SIZE - soff);
+
+                       if (buf) {
+                               memcpy_from_page(buf, spage, soff, len);
+                               buf += len;
+                       } else {
+                               struct bio_vec bv;
+
+                               get_bvec_at(cursor, &bv);
+                               len = min_t(int, len, bv.bv_len);
+                               memcpy_page(bv.bv_page, bv.bv_offset,
+                                           spage, soff, len);
+                               ceph_msg_data_advance(cursor, len);
+                       }
+                       spos += len;
+                       ret -= len;
+               } while (ret);
+       }
+}
+
 static int decrypt_tail(struct ceph_connection *con)
 {
        struct sg_table enc_sgt = {};
        struct sg_table sgt = {};
+       struct page **pages = NULL;
+       bool sparse = con->in_msg->sparse_read;
+       int dpos = 0;
        int tail_len;
        int ret;
 
@@ -1053,9 +1144,14 @@ static int decrypt_tail(struct ceph_connection *con)
        if (ret)
                goto out;
 
+       if (sparse) {
+               dpos = padded_len(front_len(con->in_msg) + padded_len(middle_len(con->in_msg)));
+               pages = con->v2.in_enc_pages;
+       }
+
        ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
-                       MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
-                       con->v2.in_buf, true);
+                               MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
+                               con->v2.in_buf, pages, dpos, true);
        if (ret)
                goto out;
 
@@ -1065,6 +1161,12 @@ static int decrypt_tail(struct ceph_connection *con)
        if (ret)
                goto out;
 
+       if (sparse && data_len(con->in_msg)) {
+               ret = process_v2_sparse_read(con, con->v2.in_enc_pages, dpos);
+               if (ret)
+                       goto out;
+       }
+
        WARN_ON(!con->v2.in_enc_page_cnt);
        ceph_release_page_vector(con->v2.in_enc_pages,
                                 con->v2.in_enc_page_cnt);
@@ -1588,7 +1690,7 @@ static int prepare_message_secure(struct ceph_connection *con)
 
        encode_epilogue_secure(con, false);
        ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
-                               &con->v2.out_epil, false);
+                               &con->v2.out_epil, NULL, 0, false);
        if (ret)
                goto out;
 
@@ -1825,6 +1927,123 @@ static void prepare_read_data_cont(struct ceph_connection *con)
        con->v2.in_state = IN_S_HANDLE_EPILOGUE;
 }
 
+static int prepare_sparse_read_cont(struct ceph_connection *con)
+{
+       int ret;
+       struct bio_vec bv;
+       char *buf = NULL;
+       struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+
+       WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT);
+
+       if (iov_iter_is_bvec(&con->v2.in_iter)) {
+               if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+                       con->in_data_crc = crc32c(con->in_data_crc,
+                                                 page_address(con->bounce_page),
+                                                 con->v2.in_bvec.bv_len);
+                       get_bvec_at(cursor, &bv);
+                       memcpy_to_page(bv.bv_page, bv.bv_offset,
+                                      page_address(con->bounce_page),
+                                      con->v2.in_bvec.bv_len);
+               } else {
+                       con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
+                                                           con->v2.in_bvec.bv_page,
+                                                           con->v2.in_bvec.bv_offset,
+                                                           con->v2.in_bvec.bv_len);
+               }
+
+               ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len);
+               cursor->sr_resid -= con->v2.in_bvec.bv_len;
+               dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__,
+                    con->v2.in_bvec.bv_len, cursor->sr_resid);
+               WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid);
+               if (cursor->sr_resid) {
+                       get_bvec_at(cursor, &bv);
+                       if (bv.bv_len > cursor->sr_resid)
+                               bv.bv_len = cursor->sr_resid;
+                       if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+                               bv.bv_page = con->bounce_page;
+                               bv.bv_offset = 0;
+                       }
+                       set_in_bvec(con, &bv);
+                       con->v2.data_len_remain -= bv.bv_len;
+                       return 0;
+               }
+       } else if (iov_iter_is_kvec(&con->v2.in_iter)) {
+               /* On first call, we have no kvec so don't compute crc */
+               if (con->v2.in_kvec_cnt) {
+                       WARN_ON_ONCE(con->v2.in_kvec_cnt > 1);
+                       con->in_data_crc = crc32c(con->in_data_crc,
+                                                 con->v2.in_kvecs[0].iov_base,
+                                                 con->v2.in_kvecs[0].iov_len);
+               }
+       } else {
+               return -EIO;
+       }
+
+       /* get next extent */
+       ret = con->ops->sparse_read(con, cursor, &buf);
+       if (ret <= 0) {
+               if (ret < 0)
+                       return ret;
+
+               reset_in_kvecs(con);
+               add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+               con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+               return 0;
+       }
+
+       if (buf) {
+               /* receive into buffer */
+               reset_in_kvecs(con);
+               add_in_kvec(con, buf, ret);
+               con->v2.data_len_remain -= ret;
+               return 0;
+       }
+
+       if (ret > cursor->total_resid) {
+               pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n",
+                       __func__, ret, cursor->total_resid, cursor->resid);
+               return -EIO;
+       }
+       get_bvec_at(cursor, &bv);
+       if (bv.bv_len > cursor->sr_resid)
+               bv.bv_len = cursor->sr_resid;
+       if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+               if (unlikely(!con->bounce_page)) {
+                       con->bounce_page = alloc_page(GFP_NOIO);
+                       if (!con->bounce_page) {
+                               pr_err("failed to allocate bounce page\n");
+                               return -ENOMEM;
+                       }
+               }
+
+               bv.bv_page = con->bounce_page;
+               bv.bv_offset = 0;
+       }
+       set_in_bvec(con, &bv);
+       con->v2.data_len_remain -= ret;
+       return ret;
+}
+
+static int prepare_sparse_read_data(struct ceph_connection *con)
+{
+       struct ceph_msg *msg = con->in_msg;
+
+       dout("%s: starting sparse read\n", __func__);
+
+       if (WARN_ON_ONCE(!con->ops->sparse_read))
+               return -EOPNOTSUPP;
+
+       if (!con_secure(con))
+               con->in_data_crc = -1;
+
+       reset_in_kvecs(con);
+       con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
+       con->v2.data_len_remain = data_len(msg);
+       return prepare_sparse_read_cont(con);
+}
+
 static int prepare_read_tail_plain(struct ceph_connection *con)
 {
        struct ceph_msg *msg = con->in_msg;
@@ -1845,7 +2064,10 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
        }
 
        if (data_len(msg)) {
-               con->v2.in_state = IN_S_PREPARE_READ_DATA;
+               if (msg->sparse_read)
+                       con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
+               else
+                       con->v2.in_state = IN_S_PREPARE_READ_DATA;
        } else {
                add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
                con->v2.in_state = IN_S_HANDLE_EPILOGUE;
@@ -2898,6 +3120,12 @@ static int populate_in_iter(struct ceph_connection *con)
                        prepare_read_enc_page(con);
                        ret = 0;
                        break;
+               case IN_S_PREPARE_SPARSE_DATA:
+                       ret = prepare_sparse_read_data(con);
+                       break;
+               case IN_S_PREPARE_SPARSE_DATA_CONT:
+                       ret = prepare_sparse_read_cont(con);
+                       break;
                case IN_S_HANDLE_EPILOGUE:
                        ret = handle_epilogue(con);
                        break;
@@ -3489,6 +3717,23 @@ static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
        con->v2.in_state = IN_S_FINISH_SKIP;
 }
 
+static void revoke_at_prepare_sparse_data(struct ceph_connection *con)
+{
+       int resid;  /* current piece of data */
+       int remaining;
+
+       WARN_ON(con_secure(con));
+       WARN_ON(!data_len(con->in_msg));
+       WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+       resid = iov_iter_count(&con->v2.in_iter);
+       dout("%s con %p resid %d\n", __func__, con, resid);
+
+       remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain;
+       con->v2.in_iter.count -= resid;
+       set_in_skip(con, resid + remaining);
+       con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
 static void revoke_at_handle_epilogue(struct ceph_connection *con)
 {
        int resid;
@@ -3505,6 +3750,7 @@ static void revoke_at_handle_epilogue(struct ceph_connection *con)
 void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
 {
        switch (con->v2.in_state) {
+       case IN_S_PREPARE_SPARSE_DATA:
        case IN_S_PREPARE_READ_DATA:
                revoke_at_prepare_read_data(con);
                break;
@@ -3514,6 +3760,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
        case IN_S_PREPARE_READ_ENC_PAGE:
                revoke_at_prepare_read_enc_page(con);
                break;
+       case IN_S_PREPARE_SPARSE_DATA_CONT:
+               revoke_at_prepare_sparse_data(con);
+               break;
        case IN_S_HANDLE_EPILOGUE:
                revoke_at_handle_epilogue(con);
                break;
index 658a6f2..d3a759e 100644 (file)
@@ -171,6 +171,13 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
        osd_data->num_bvecs = num_bvecs;
 }
 
+static void ceph_osd_iter_init(struct ceph_osd_data *osd_data,
+                              struct iov_iter *iter)
+{
+       osd_data->type = CEPH_OSD_DATA_TYPE_ITER;
+       osd_data->iter = *iter;
+}
+
 static struct ceph_osd_data *
 osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
 {
@@ -264,6 +271,22 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
 }
 EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
 
+/**
+ * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer
+ * @osd_req: The request to set up
+ * @which: Index of the operation in which to set the iter
+ * @iter: The buffer iterator
+ */
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+                               unsigned int which, struct iov_iter *iter)
+{
+       struct ceph_osd_data *osd_data;
+
+       osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+       ceph_osd_iter_init(osd_data, iter);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_iter);
+
 static void osd_req_op_cls_request_info_pagelist(
                        struct ceph_osd_request *osd_req,
                        unsigned int which, struct ceph_pagelist *pagelist)
@@ -346,6 +369,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
 #endif /* CONFIG_BLOCK */
        case CEPH_OSD_DATA_TYPE_BVECS:
                return osd_data->bvec_pos.iter.bi_size;
+       case CEPH_OSD_DATA_TYPE_ITER:
+               return iov_iter_count(&osd_data->iter);
        default:
                WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
                return 0;
@@ -376,8 +401,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
 
        switch (op->op) {
        case CEPH_OSD_OP_READ:
+       case CEPH_OSD_OP_SPARSE_READ:
        case CEPH_OSD_OP_WRITE:
        case CEPH_OSD_OP_WRITEFULL:
+               kfree(op->extent.sparse_ext);
                ceph_osd_data_release(&op->extent.osd_data);
                break;
        case CEPH_OSD_OP_CALL:
@@ -669,6 +696,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
                /* reply */
                case CEPH_OSD_OP_STAT:
                case CEPH_OSD_OP_READ:
+               case CEPH_OSD_OP_SPARSE_READ:
                case CEPH_OSD_OP_LIST_WATCHERS:
                        *num_reply_data_items += 1;
                        break;
@@ -738,7 +766,7 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
 
        BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
               opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO &&
-              opcode != CEPH_OSD_OP_TRUNCATE);
+              opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ);
 
        op->extent.offset = offset;
        op->extent.length = length;
@@ -951,6 +979,8 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
 #endif
        } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
                ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
+       } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) {
+               ceph_msg_data_add_iter(msg, &osd_data->iter);
        } else {
                BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
        }
@@ -963,6 +993,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
        case CEPH_OSD_OP_STAT:
                break;
        case CEPH_OSD_OP_READ:
+       case CEPH_OSD_OP_SPARSE_READ:
        case CEPH_OSD_OP_WRITE:
        case CEPH_OSD_OP_WRITEFULL:
        case CEPH_OSD_OP_ZERO:
@@ -1017,6 +1048,10 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
                dst->copy_from.src_fadvise_flags =
                        cpu_to_le32(src->copy_from.src_fadvise_flags);
                break;
+       case CEPH_OSD_OP_ASSERT_VER:
+               dst->assert_ver.unused = cpu_to_le64(0);
+               dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver);
+               break;
        default:
                pr_err("unsupported osd opcode %s\n",
                        ceph_osd_op_name(src->op));
@@ -1059,7 +1094,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
        BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
               opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
-              opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
+              opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE &&
+              opcode != CEPH_OSD_OP_SPARSE_READ);
 
        req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
                                        GFP_NOFS);
@@ -1100,15 +1136,30 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
        if (flags & CEPH_OSD_FLAG_WRITE)
                req->r_data_offset = off;
 
-       if (num_ops > 1)
+       if (num_ops > 1) {
+               int num_req_ops, num_rep_ops;
+
                /*
-                * This is a special case for ceph_writepages_start(), but it
-                * also covers ceph_uninline_data().  If more multi-op request
-                * use cases emerge, we will need a separate helper.
+                * If this is a multi-op write request, assume that we'll need
+                * request ops. If it's a multi-op read then assume we'll need
+                * reply ops. Anything else and call it -EINVAL.
                 */
-               r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
-       else
+               if (flags & CEPH_OSD_FLAG_WRITE) {
+                       num_req_ops = num_ops;
+                       num_rep_ops = 0;
+               } else if (flags & CEPH_OSD_FLAG_READ) {
+                       num_req_ops = 0;
+                       num_rep_ops = num_ops;
+               } else {
+                       r = -EINVAL;
+                       goto fail;
+               }
+
+               r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_req_ops,
+                                              num_rep_ops);
+       } else {
                r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+       }
        if (r)
                goto fail;
 
@@ -1120,6 +1171,18 @@ fail:
 }
 EXPORT_SYMBOL(ceph_osdc_new_request);
 
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+{
+       op->extent.sparse_ext_cnt = cnt;
+       op->extent.sparse_ext = kmalloc_array(cnt,
+                                             sizeof(*op->extent.sparse_ext),
+                                             GFP_NOFS);
+       if (!op->extent.sparse_ext)
+               return -ENOMEM;
+       return 0;
+}
+EXPORT_SYMBOL(__ceph_alloc_sparse_ext_map);
+
 /*
  * We keep osd requests in an rbtree, sorted by ->r_tid.
  */
@@ -1177,6 +1240,7 @@ static void osd_init(struct ceph_osd *osd)
 {
        refcount_set(&osd->o_ref, 1);
        RB_CLEAR_NODE(&osd->o_node);
+       spin_lock_init(&osd->o_requests_lock);
        osd->o_requests = RB_ROOT;
        osd->o_linger_requests = RB_ROOT;
        osd->o_backoff_mappings = RB_ROOT;
@@ -1187,6 +1251,13 @@ static void osd_init(struct ceph_osd *osd)
        mutex_init(&osd->lock);
 }
 
+static void ceph_init_sparse_read(struct ceph_sparse_read *sr)
+{
+       kfree(sr->sr_extent);
+       memset(sr, '\0', sizeof(*sr));
+       sr->sr_state = CEPH_SPARSE_READ_HDR;
+}
+
 static void osd_cleanup(struct ceph_osd *osd)
 {
        WARN_ON(!RB_EMPTY_NODE(&osd->o_node));
@@ -1197,6 +1268,8 @@ static void osd_cleanup(struct ceph_osd *osd)
        WARN_ON(!list_empty(&osd->o_osd_lru));
        WARN_ON(!list_empty(&osd->o_keepalive_item));
 
+       ceph_init_sparse_read(&osd->o_sparse_read);
+
        if (osd->o_auth.authorizer) {
                WARN_ON(osd_homeless(osd));
                ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
@@ -1216,6 +1289,9 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
        osd_init(osd);
        osd->o_osdc = osdc;
        osd->o_osd = onum;
+       osd->o_sparse_op_idx = -1;
+
+       ceph_init_sparse_read(&osd->o_sparse_read);
 
        ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
 
@@ -1406,7 +1482,9 @@ static void link_request(struct ceph_osd *osd, struct ceph_osd_request *req)
                atomic_inc(&osd->o_osdc->num_homeless);
 
        get_osd(osd);
+       spin_lock(&osd->o_requests_lock);
        insert_request(&osd->o_requests, req);
+       spin_unlock(&osd->o_requests_lock);
        req->r_osd = osd;
 }
 
@@ -1418,7 +1496,9 @@ static void unlink_request(struct ceph_osd *osd, struct ceph_osd_request *req)
             req, req->r_tid);
 
        req->r_osd = NULL;
+       spin_lock(&osd->o_requests_lock);
        erase_request(&osd->o_requests, req);
+       spin_unlock(&osd->o_requests_lock);
        put_osd(osd);
 
        if (!osd_homeless(osd))
@@ -2016,6 +2096,7 @@ static void setup_request_data(struct ceph_osd_request *req)
                                               &op->raw_data_in);
                        break;
                case CEPH_OSD_OP_READ:
+               case CEPH_OSD_OP_SPARSE_READ:
                        ceph_osdc_msg_data_add(reply_msg,
                                               &op->extent.osd_data);
                        break;
@@ -2435,8 +2516,10 @@ static void finish_request(struct ceph_osd_request *req)
 
        req->r_end_latency = ktime_get();
 
-       if (req->r_osd)
+       if (req->r_osd) {
+               ceph_init_sparse_read(&req->r_osd->o_sparse_read);
                unlink_request(req->r_osd, req);
+       }
        atomic_dec(&osdc->num_requests);
 
        /*
@@ -3795,6 +3878,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
         * one (type of) reply back.
         */
        WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
+       req->r_version = m.user_version;
        req->r_result = m.result ?: data_len;
        finish_request(req);
        mutex_unlock(&osd->lock);
@@ -5348,6 +5432,24 @@ static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg)
        ceph_msg_put(msg);
 }
 
+/* How much sparse data was requested? */
+static u64 sparse_data_requested(struct ceph_osd_request *req)
+{
+       u64 len = 0;
+
+       if (req->r_flags & CEPH_OSD_FLAG_READ) {
+               int i;
+
+               for (i = 0; i < req->r_num_ops; ++i) {
+                       struct ceph_osd_req_op *op = &req->r_ops[i];
+
+                       if (op->op == CEPH_OSD_OP_SPARSE_READ)
+                               len += op->extent.length;
+               }
+       }
+       return len;
+}
+
 /*
  * Lookup and return message for incoming reply.  Don't try to do
  * anything about a larger than preallocated data portion of the
@@ -5364,6 +5466,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
        int front_len = le32_to_cpu(hdr->front_len);
        int data_len = le32_to_cpu(hdr->data_len);
        u64 tid = le64_to_cpu(hdr->tid);
+       u64 srlen;
 
        down_read(&osdc->lock);
        if (!osd_registered(osd)) {
@@ -5396,7 +5499,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
                req->r_reply = m;
        }
 
-       if (data_len > req->r_reply->data_length) {
+       srlen = sparse_data_requested(req);
+       if (!srlen && data_len > req->r_reply->data_length) {
                pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
                        __func__, osd->o_osd, req->r_tid, data_len,
                        req->r_reply->data_length);
@@ -5406,6 +5510,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
        }
 
        m = ceph_msg_get(req->r_reply);
+       m->sparse_read = (bool)srlen;
+
        dout("get_reply tid %lld %p\n", tid, m);
 
 out_unlock_session:
@@ -5638,9 +5744,217 @@ static int osd_check_message_signature(struct ceph_msg *msg)
        return ceph_auth_check_message_signature(auth, msg);
 }
 
+static void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len,
+                          bool zero)
+{
+       while (len) {
+               struct page *page;
+               size_t poff, plen;
+
+               page = ceph_msg_data_next(cursor, &poff, &plen);
+               if (plen > len)
+                       plen = len;
+               if (zero)
+                       zero_user_segment(page, poff, poff + plen);
+               len -= plen;
+               ceph_msg_data_advance(cursor, plen);
+       }
+}
+
+static int prep_next_sparse_read(struct ceph_connection *con,
+                                struct ceph_msg_data_cursor *cursor)
+{
+       struct ceph_osd *o = con->private;
+       struct ceph_sparse_read *sr = &o->o_sparse_read;
+       struct ceph_osd_request *req;
+       struct ceph_osd_req_op *op;
+
+       spin_lock(&o->o_requests_lock);
+       req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid));
+       if (!req) {
+               spin_unlock(&o->o_requests_lock);
+               return -EBADR;
+       }
+
+       if (o->o_sparse_op_idx < 0) {
+               u64 srlen = sparse_data_requested(req);
+
+               dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
+                    __func__, o->o_osd, srlen);
+               ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+       } else {
+               u64 end;
+
+               op = &req->r_ops[o->o_sparse_op_idx];
+
+               WARN_ON_ONCE(op->extent.sparse_ext);
+
+               /* hand back buffer we took earlier */
+               op->extent.sparse_ext = sr->sr_extent;
+               sr->sr_extent = NULL;
+               op->extent.sparse_ext_cnt = sr->sr_count;
+               sr->sr_ext_len = 0;
+               dout("%s: [%d] completed extent array len %d cursor->resid %zd\n",
+                    __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid);
+               /* Advance to end of data for this operation */
+               end = ceph_sparse_ext_map_end(op);
+               if (end < sr->sr_req_len)
+                       advance_cursor(cursor, sr->sr_req_len - end, false);
+       }
+
+       ceph_init_sparse_read(sr);
+
+       /* find next op in this request (if any) */
+       while (++o->o_sparse_op_idx < req->r_num_ops) {
+               op = &req->r_ops[o->o_sparse_op_idx];
+               if (op->op == CEPH_OSD_OP_SPARSE_READ)
+                       goto found;
+       }
+
+       /* reset for next sparse read request */
+       spin_unlock(&o->o_requests_lock);
+       o->o_sparse_op_idx = -1;
+       return 0;
+found:
+       sr->sr_req_off = op->extent.offset;
+       sr->sr_req_len = op->extent.length;
+       sr->sr_pos = sr->sr_req_off;
+       dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__,
+            o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len);
+
+       /* hand off request's sparse extent map buffer */
+       sr->sr_ext_len = op->extent.sparse_ext_cnt;
+       op->extent.sparse_ext_cnt = 0;
+       sr->sr_extent = op->extent.sparse_ext;
+       op->extent.sparse_ext = NULL;
+
+       spin_unlock(&o->o_requests_lock);
+       return 1;
+}
+
+#ifdef __BIG_ENDIAN
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+       int i;
+
+       for (i = 0; i < sr->sr_count; i++) {
+               struct ceph_sparse_extent *ext = &sr->sr_extent[i];
+
+               ext->off = le64_to_cpu((__force __le64)ext->off);
+               ext->len = le64_to_cpu((__force __le64)ext->len);
+       }
+}
+#else
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+}
+#endif
+
+#define MAX_EXTENTS 4096
+
+static int osd_sparse_read(struct ceph_connection *con,
+                          struct ceph_msg_data_cursor *cursor,
+                          char **pbuf)
+{
+       struct ceph_osd *o = con->private;
+       struct ceph_sparse_read *sr = &o->o_sparse_read;
+       u32 count = sr->sr_count;
+       u64 eoff, elen;
+       int ret;
+
+       switch (sr->sr_state) {
+       case CEPH_SPARSE_READ_HDR:
+next_op:
+               ret = prep_next_sparse_read(con, cursor);
+               if (ret <= 0)
+                       return ret;
+
+               /* number of extents */
+               ret = sizeof(sr->sr_count);
+               *pbuf = (char *)&sr->sr_count;
+               sr->sr_state = CEPH_SPARSE_READ_EXTENTS;
+               break;
+       case CEPH_SPARSE_READ_EXTENTS:
+               /* Convert sr_count to host-endian */
+               count = le32_to_cpu((__force __le32)sr->sr_count);
+               sr->sr_count = count;
+               dout("[%d] got %u extents\n", o->o_osd, count);
+
+               if (count > 0) {
+                       if (!sr->sr_extent || count > sr->sr_ext_len) {
+                               /*
+                                * Apply a hard cap to the number of extents.
+                                * If we have more, assume something is wrong.
+                                */
+                               if (count > MAX_EXTENTS) {
+                                       dout("%s: OSD returned 0x%x extents in a single reply!\n",
+                                            __func__, count);
+                                       return -EREMOTEIO;
+                               }
+
+                               /* no extent array provided, or too short */
+                               kfree(sr->sr_extent);
+                               sr->sr_extent = kmalloc_array(count,
+                                                             sizeof(*sr->sr_extent),
+                                                             GFP_NOIO);
+                               if (!sr->sr_extent)
+                                       return -ENOMEM;
+                               sr->sr_ext_len = count;
+                       }
+                       ret = count * sizeof(*sr->sr_extent);
+                       *pbuf = (char *)sr->sr_extent;
+                       sr->sr_state = CEPH_SPARSE_READ_DATA_LEN;
+                       break;
+               }
+               /* No extents? Read data len */
+               fallthrough;
+       case CEPH_SPARSE_READ_DATA_LEN:
+               convert_extent_map(sr);
+               ret = sizeof(sr->sr_datalen);
+               *pbuf = (char *)&sr->sr_datalen;
+               sr->sr_state = CEPH_SPARSE_READ_DATA;
+               break;
+       case CEPH_SPARSE_READ_DATA:
+               if (sr->sr_index >= count) {
+                       sr->sr_state = CEPH_SPARSE_READ_HDR;
+                       goto next_op;
+               }
+
+               eoff = sr->sr_extent[sr->sr_index].off;
+               elen = sr->sr_extent[sr->sr_index].len;
+
+               dout("[%d] ext %d off 0x%llx len 0x%llx\n",
+                    o->o_osd, sr->sr_index, eoff, elen);
+
+               if (elen > INT_MAX) {
+                       dout("Sparse read extent length too long (0x%llx)\n",
+                            elen);
+                       return -EREMOTEIO;
+               }
+
+               /* zero out anything from sr_pos to start of extent */
+               if (sr->sr_pos < eoff)
+                       advance_cursor(cursor, eoff - sr->sr_pos, true);
+
+               /* Set position to end of extent */
+               sr->sr_pos = eoff + elen;
+
+               /* send back the new length and nullify the ptr */
+               cursor->sr_resid = elen;
+               ret = elen;
+               *pbuf = NULL;
+
+               /* Bump the array index */
+               ++sr->sr_index;
+               break;
+       }
+       return ret;
+}
+
 static const struct ceph_connection_operations osd_con_ops = {
        .get = osd_get_con,
        .put = osd_put_con,
+       .sparse_read = osd_sparse_read,
        .alloc_msg = osd_alloc_msg,
        .dispatch = osd_dispatch,
        .fault = osd_fault,
index 89d15ce..b3b3af0 100644 (file)
@@ -1831,8 +1831,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
 
        memset(&keys, 0, sizeof(keys));
        __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
-                          &keys, NULL, 0, 0, 0,
-                          FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+                          &keys, NULL, 0, 0, 0, 0);
 
        return __flow_hash_from_keys(&keys, &hashrnd);
 }
index 4570705..4eaf7ed 100644 (file)
@@ -550,7 +550,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
                             bool *pfmemalloc)
 {
        bool ret_pfmemalloc = false;
-       unsigned int obj_size;
+       size_t obj_size;
        void *obj;
 
        obj_size = SKB_HEAD_ALIGN(*size);
@@ -567,7 +567,13 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
                obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
                goto out;
        }
-       *size = obj_size = kmalloc_size_roundup(obj_size);
+
+       obj_size = kmalloc_size_roundup(obj_size);
+       /* The following cast might truncate high-order bits of obj_size, this
+        * is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
+        */
+       *size = (unsigned int)obj_size;
+
        /*
         * Try a regular allocation, when that fails and we're not entitled
         * to the reserves, fail.
@@ -4423,21 +4429,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
        struct sk_buff *segs = NULL;
        struct sk_buff *tail = NULL;
        struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
-       skb_frag_t *frag = skb_shinfo(head_skb)->frags;
        unsigned int mss = skb_shinfo(head_skb)->gso_size;
        unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
-       struct sk_buff *frag_skb = head_skb;
        unsigned int offset = doffset;
        unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
        unsigned int partial_segs = 0;
        unsigned int headroom;
        unsigned int len = head_skb->len;
+       struct sk_buff *frag_skb;
+       skb_frag_t *frag;
        __be16 proto;
        bool csum, sg;
-       int nfrags = skb_shinfo(head_skb)->nr_frags;
        int err = -ENOMEM;
        int i = 0;
-       int pos;
+       int nfrags, pos;
 
        if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
            mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
@@ -4514,6 +4519,13 @@ normal:
        headroom = skb_headroom(head_skb);
        pos = skb_headlen(head_skb);
 
+       if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+               return ERR_PTR(-ENOMEM);
+
+       nfrags = skb_shinfo(head_skb)->nr_frags;
+       frag = skb_shinfo(head_skb)->frags;
+       frag_skb = head_skb;
+
        do {
                struct sk_buff *nskb;
                skb_frag_t *nskb_frag;
@@ -4534,6 +4546,10 @@ normal:
                    (skb_headlen(list_skb) == len || sg)) {
                        BUG_ON(skb_headlen(list_skb) > len);
 
+                       nskb = skb_clone(list_skb, GFP_ATOMIC);
+                       if (unlikely(!nskb))
+                               goto err;
+
                        i = 0;
                        nfrags = skb_shinfo(list_skb)->nr_frags;
                        frag = skb_shinfo(list_skb)->frags;
@@ -4552,12 +4568,8 @@ normal:
                                frag++;
                        }
 
-                       nskb = skb_clone(list_skb, GFP_ATOMIC);
                        list_skb = list_skb->next;
 
-                       if (unlikely(!nskb))
-                               goto err;
-
                        if (unlikely(pskb_trim(nskb, len))) {
                                kfree_skb(nskb);
                                goto err;
@@ -4633,12 +4645,16 @@ normal:
                skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
                                           SKBFL_SHARED_FRAG;
 
-               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-                   skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+               if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
                        goto err;
 
                while (pos < offset + len) {
                        if (i >= nfrags) {
+                               if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+                                   skb_zerocopy_clone(nskb, list_skb,
+                                                      GFP_ATOMIC))
+                                       goto err;
+
                                i = 0;
                                nfrags = skb_shinfo(list_skb)->nr_frags;
                                frag = skb_shinfo(list_skb)->frags;
@@ -4652,10 +4668,6 @@ normal:
                                        i--;
                                        frag--;
                                }
-                               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-                                   skb_zerocopy_clone(nskb, frag_skb,
-                                                      GFP_ATOMIC))
-                                       goto err;
 
                                list_skb = list_skb->next;
                        }
@@ -5207,7 +5219,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
        serr->ee.ee_info = tstype;
        serr->opt_stats = opt_stats;
        serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
-       if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
+       if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
                serr->ee.ee_data = skb_shinfo(skb)->tskey;
                if (sk_is_tcp(sk))
                        serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
@@ -5263,21 +5275,23 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 {
        struct sk_buff *skb;
        bool tsonly, opt_stats = false;
+       u32 tsflags;
 
        if (!sk)
                return;
 
-       if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+       tsflags = READ_ONCE(sk->sk_tsflags);
+       if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
            skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
                return;
 
-       tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+       tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
        if (!skb_may_tx_timestamp(sk, tsonly))
                return;
 
        if (tsonly) {
 #ifdef CONFIG_INET
-               if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+               if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
                    sk_is_tcp(sk)) {
                        skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
                                                             ack_skb);
index a0659fc..6c31eef 100644 (file)
@@ -612,12 +612,18 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
 static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
                               u32 off, u32 len, bool ingress)
 {
+       int err = 0;
+
        if (!ingress) {
                if (!sock_writeable(psock->sk))
                        return -EAGAIN;
                return skb_send_sock(psock->sk, skb, off, len);
        }
-       return sk_psock_skb_ingress(psock, skb, off, len);
+       skb_get(skb);
+       err = sk_psock_skb_ingress(psock, skb, off, len);
+       if (err < 0)
+               kfree_skb(skb);
+       return err;
 }
 
 static void sk_psock_skb_state(struct sk_psock *psock,
@@ -685,9 +691,7 @@ static void sk_psock_backlog(struct work_struct *work)
                } while (len);
 
                skb = skb_dequeue(&psock->ingress_skb);
-               if (!ingress) {
-                       kfree_skb(skb);
-               }
+               kfree_skb(skb);
        }
 end:
        mutex_unlock(&psock->work_mutex);
index 666a17c..16584e2 100644 (file)
@@ -765,7 +765,8 @@ bool sk_mc_loop(struct sock *sk)
                return false;
        if (!sk)
                return true;
-       switch (sk->sk_family) {
+       /* IPV6_ADDRFORM can change sk->sk_family under us. */
+       switch (READ_ONCE(sk->sk_family)) {
        case AF_INET:
                return inet_test_bit(MC_LOOP, sk);
 #if IS_ENABLED(CONFIG_IPV6)
@@ -893,7 +894,7 @@ static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
        if (!match)
                return -EINVAL;
 
-       sk->sk_bind_phc = phc_index;
+       WRITE_ONCE(sk->sk_bind_phc, phc_index);
 
        return 0;
 }
@@ -936,7 +937,7 @@ int sock_set_timestamping(struct sock *sk, int optname,
                        return ret;
        }
 
-       sk->sk_tsflags = val;
+       WRITE_ONCE(sk->sk_tsflags, val);
        sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
 
        if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
@@ -1044,7 +1045,7 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
                mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
                return -ENOMEM;
        }
-       sk->sk_forward_alloc += pages << PAGE_SHIFT;
+       sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
 
        WRITE_ONCE(sk->sk_reserved_mem,
                   sk->sk_reserved_mem + (pages << PAGE_SHIFT));
@@ -1718,8 +1719,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
 
        case SO_TIMESTAMPING_OLD:
                lv = sizeof(v.timestamping);
-               v.timestamping.flags = sk->sk_tsflags;
-               v.timestamping.bind_phc = sk->sk_bind_phc;
+               v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+               v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
                break;
 
        case SO_RCVTIMEO_OLD:
@@ -2746,9 +2747,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
                        break;
-               if (sk->sk_shutdown & SEND_SHUTDOWN)
+               if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
                        break;
-               if (sk->sk_err)
+               if (READ_ONCE(sk->sk_err))
                        break;
                timeo = schedule_timeout(timeo);
        }
@@ -2776,7 +2777,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
                        goto failure;
 
                err = -EPIPE;
-               if (sk->sk_shutdown & SEND_SHUTDOWN)
+               if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
                        goto failure;
 
                if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
@@ -3138,10 +3139,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 {
        int ret, amt = sk_mem_pages(size);
 
-       sk->sk_forward_alloc += amt << PAGE_SHIFT;
+       sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
        ret = __sk_mem_raise_allocated(sk, size, amt, kind);
        if (!ret)
-               sk->sk_forward_alloc -= amt << PAGE_SHIFT;
+               sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
        return ret;
 }
 EXPORT_SYMBOL(__sk_mem_schedule);
@@ -3173,7 +3174,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
 void __sk_mem_reclaim(struct sock *sk, int amount)
 {
        amount >>= PAGE_SHIFT;
-       sk->sk_forward_alloc -= amount << PAGE_SHIFT;
+       sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
        __sk_mem_reduce_allocated(sk, amount);
 }
 EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -3742,7 +3743,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
        mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
        mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
        mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
-       mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+       mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
        mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
        mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
        mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
index 8f07fea..cb11750 100644 (file)
@@ -18,7 +18,7 @@ struct bpf_stab {
        struct bpf_map map;
        struct sock **sks;
        struct sk_psock_progs progs;
-       raw_spinlock_t lock;
+       spinlock_t lock;
 };
 
 #define SOCK_CREATE_FLAG_MASK                          \
@@ -44,7 +44,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
                return ERR_PTR(-ENOMEM);
 
        bpf_map_init_from_attr(&stab->map, attr);
-       raw_spin_lock_init(&stab->lock);
+       spin_lock_init(&stab->lock);
 
        stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
                                       sizeof(struct sock *),
@@ -411,7 +411,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
        struct sock *sk;
        int err = 0;
 
-       raw_spin_lock_bh(&stab->lock);
+       spin_lock_bh(&stab->lock);
        sk = *psk;
        if (!sk_test || sk_test == sk)
                sk = xchg(psk, NULL);
@@ -421,7 +421,7 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
        else
                err = -EINVAL;
 
-       raw_spin_unlock_bh(&stab->lock);
+       spin_unlock_bh(&stab->lock);
        return err;
 }
 
@@ -487,7 +487,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
        psock = sk_psock(sk);
        WARN_ON_ONCE(!psock);
 
-       raw_spin_lock_bh(&stab->lock);
+       spin_lock_bh(&stab->lock);
        osk = stab->sks[idx];
        if (osk && flags == BPF_NOEXIST) {
                ret = -EEXIST;
@@ -501,10 +501,10 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
        stab->sks[idx] = sk;
        if (osk)
                sock_map_unref(osk, &stab->sks[idx]);
-       raw_spin_unlock_bh(&stab->lock);
+       spin_unlock_bh(&stab->lock);
        return 0;
 out_unlock:
-       raw_spin_unlock_bh(&stab->lock);
+       spin_unlock_bh(&stab->lock);
        if (psock)
                sk_psock_put(sk, psock);
 out_free:
@@ -835,7 +835,7 @@ struct bpf_shtab_elem {
 
 struct bpf_shtab_bucket {
        struct hlist_head head;
-       raw_spinlock_t lock;
+       spinlock_t lock;
 };
 
 struct bpf_shtab {
@@ -910,7 +910,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
         * is okay since it's going away only after RCU grace period.
         * However, we need to check whether it's still present.
         */
-       raw_spin_lock_bh(&bucket->lock);
+       spin_lock_bh(&bucket->lock);
        elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
                                               elem->key, map->key_size);
        if (elem_probe && elem_probe == elem) {
@@ -918,7 +918,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
                sock_map_unref(elem->sk, elem);
                sock_hash_free_elem(htab, elem);
        }
-       raw_spin_unlock_bh(&bucket->lock);
+       spin_unlock_bh(&bucket->lock);
 }
 
 static long sock_hash_delete_elem(struct bpf_map *map, void *key)
@@ -932,7 +932,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
        hash = sock_hash_bucket_hash(key, key_size);
        bucket = sock_hash_select_bucket(htab, hash);
 
-       raw_spin_lock_bh(&bucket->lock);
+       spin_lock_bh(&bucket->lock);
        elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
        if (elem) {
                hlist_del_rcu(&elem->node);
@@ -940,7 +940,7 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
                sock_hash_free_elem(htab, elem);
                ret = 0;
        }
-       raw_spin_unlock_bh(&bucket->lock);
+       spin_unlock_bh(&bucket->lock);
        return ret;
 }
 
@@ -1000,7 +1000,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
        hash = sock_hash_bucket_hash(key, key_size);
        bucket = sock_hash_select_bucket(htab, hash);
 
-       raw_spin_lock_bh(&bucket->lock);
+       spin_lock_bh(&bucket->lock);
        elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
        if (elem && flags == BPF_NOEXIST) {
                ret = -EEXIST;
@@ -1026,10 +1026,10 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
                sock_map_unref(elem->sk, elem);
                sock_hash_free_elem(htab, elem);
        }
-       raw_spin_unlock_bh(&bucket->lock);
+       spin_unlock_bh(&bucket->lock);
        return 0;
 out_unlock:
-       raw_spin_unlock_bh(&bucket->lock);
+       spin_unlock_bh(&bucket->lock);
        sk_psock_put(sk, psock);
 out_free:
        sk_psock_free_link(link);
@@ -1115,7 +1115,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
 
        for (i = 0; i < htab->buckets_num; i++) {
                INIT_HLIST_HEAD(&htab->buckets[i].head);
-               raw_spin_lock_init(&htab->buckets[i].lock);
+               spin_lock_init(&htab->buckets[i].lock);
        }
 
        return &htab->map;
@@ -1147,11 +1147,11 @@ static void sock_hash_free(struct bpf_map *map)
                 * exists, psock exists and holds a ref to socket. That
                 * lets us to grab a socket ref too.
                 */
-               raw_spin_lock_bh(&bucket->lock);
+               spin_lock_bh(&bucket->lock);
                hlist_for_each_entry(elem, &bucket->head, node)
                        sock_hold(elem->sk);
                hlist_move_list(&bucket->head, &unlink_list);
-               raw_spin_unlock_bh(&bucket->lock);
+               spin_unlock_bh(&bucket->lock);
 
                /* Process removed entries out of atomic context to
                 * block for socket lock before deleting the psock's
index 1086653..d0bc1dd 100644 (file)
@@ -157,26 +157,24 @@ out_status:
 int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
 {
        struct net *net = sock_net(skb->sk);
-       struct handshake_req *req = NULL;
-       struct socket *sock = NULL;
+       struct handshake_req *req;
+       struct socket *sock;
        int fd, status, err;
 
        if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
                return -EINVAL;
        fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
 
-       err = 0;
        sock = sockfd_lookup(fd, &err);
-       if (err) {
-               err = -EBADF;
-               goto out_status;
-       }
+       if (!sock)
+               return err;
 
        req = handshake_req_hash_lookup(sock->sk);
        if (!req) {
                err = -EBUSY;
+               trace_handshake_cmd_done_err(net, req, sock->sk, err);
                fput(sock->file);
-               goto out_status;
+               return err;
        }
 
        trace_handshake_cmd_done(net, req, sock->sk, fd);
@@ -188,10 +186,6 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
        handshake_complete(req, status, info);
        fput(sock->file);
        return 0;
-
-out_status:
-       trace_handshake_cmd_done_err(net, req, sock->sk, err);
-       return err;
 }
 
 static unsigned int handshake_net_id;
index 65ba18a..eafa4a0 100644 (file)
@@ -278,7 +278,8 @@ void fib_release_info(struct fib_info *fi)
                                hlist_del(&nexthop_nh->nh_hash);
                        } endfor_nexthops(fi)
                }
-               fi->fib_dead = 1;
+               /* Paired with READ_ONCE() from fib_table_lookup() */
+               WRITE_ONCE(fi->fib_dead, 1);
                fib_info_put(fi);
        }
        spin_unlock_bh(&fib_info_lock);
@@ -1581,6 +1582,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 link_it:
        ofi = fib_find_info(fi);
        if (ofi) {
+               /* fib_table_lookup() should not see @fi yet. */
                fi->fib_dead = 1;
                free_fib_info(fi);
                refcount_inc(&ofi->fib_treeref);
@@ -1619,6 +1621,7 @@ err_inval:
 
 failure:
        if (fi) {
+               /* fib_table_lookup() should not see @fi yet. */
                fi->fib_dead = 1;
                free_fib_info(fi);
        }
index 74d403d..d13fb9e 100644 (file)
@@ -1582,7 +1582,8 @@ found:
                if (fa->fa_dscp &&
                    inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
                        continue;
-               if (fi->fib_dead)
+               /* Paired with WRITE_ONCE() in fib_release_info() */
+               if (READ_ONCE(fi->fib_dead))
                        continue;
                if (fa->fa_info->fib_scope < flp->flowi4_scope)
                        continue;
index 0c9e768..418e5fb 100644 (file)
@@ -353,8 +353,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
        struct flowi4 fl4;
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;
-       unsigned int size = mtu;
+       unsigned int size;
 
+       size = min(mtu, IP_MAX_MTU);
        while (1) {
                skb = alloc_skb(size + hlen + tlen,
                                GFP_ATOMIC | __GFP_NOWARN);
index e18931a..66fac12 100644 (file)
@@ -67,7 +67,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
        struct ip_options *opt  = &(IPCB(skb)->opt);
 
        __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
-       __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
 #ifdef CONFIG_NET_SWITCHDEV
        if (skb->offload_l3_fwd_mark) {
index fe9ead9..5e9c815 100644 (file)
@@ -584,7 +584,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
 static struct sk_buff *ip_extract_route_hint(const struct net *net,
                                             struct sk_buff *skb, int rt_type)
 {
-       if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+       if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
+           IPCB(skb)->flags & IPSKB_MULTIPATH)
                return NULL;
 
        return skb;
index 43ba4b7..4ab877c 100644 (file)
@@ -207,6 +207,9 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
        } else if (rt->rt_type == RTN_BROADCAST)
                IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
 
+       /* OUTOCTETS should be counted after fragment */
+       IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
+
        if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
                skb = skb_expand_head(skb, hh_len);
                if (!skb)
@@ -366,8 +369,6 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
        /*
         *      If the indicated interface is up and running, send the packet.
         */
-       IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
        skb->dev = dev;
        skb->protocol = htons(ETH_P_IP);
 
@@ -424,8 +425,6 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
 
-       IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
        skb->dev = dev;
        skb->protocol = htons(ETH_P_IP);
 
@@ -982,7 +981,7 @@ static int __ip_append_data(struct sock *sk,
        paged = !!cork->gso_size;
 
        if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
-           sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+           READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
                tskey = atomic_inc_return(&sk->sk_tskey) - 1;
 
        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
index d1c7366..cce9cb2 100644 (file)
@@ -511,7 +511,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
         * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
         */
        info = PKTINFO_SKB_CB(skb);
-       if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+       if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
            !info->ipi_ifindex)
                return false;
 
index 3f0c6d6..9e222a5 100644 (file)
@@ -1804,7 +1804,6 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
        struct ip_options *opt = &(IPCB(skb)->opt);
 
        IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
-       IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
        if (unlikely(opt->optlen))
                ip_forward_options(skb);
index d8c99bd..66f419e 100644 (file)
@@ -2144,6 +2144,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
                int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
 
                fib_select_multipath(res, h);
+               IPCB(skb)->flags |= IPSKB_MULTIPATH;
        }
 #endif
 
index b155948..0c3040a 100644 (file)
@@ -2256,14 +2256,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
                        }
                }
 
-               if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+               if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
                        has_timestamping = true;
                else
                        tss->ts[0] = (struct timespec64) {0};
        }
 
        if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
-               if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+               if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
                        has_timestamping = true;
                else
                        tss->ts[2] = (struct timespec64) {0};
index e6b4fbd..ccfc8bb 100644 (file)
@@ -3474,7 +3474,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
        if (delta <= 0)
                return;
        amt = sk_mem_pages(delta);
-       sk->sk_forward_alloc += amt << PAGE_SHIFT;
+       sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
        sk_memory_allocated_add(sk, amt);
 
        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
index 0794a2c..f39b9c8 100644 (file)
@@ -1414,9 +1414,9 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
                spin_lock(&sk_queue->lock);
 
 
-       sk->sk_forward_alloc += size;
+       sk_forward_alloc_add(sk, size);
        amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
-       sk->sk_forward_alloc -= amt;
+       sk_forward_alloc_add(sk, -amt);
 
        if (amt)
                __sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
@@ -1527,7 +1527,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
                goto uncharge_drop;
        }
 
-       sk->sk_forward_alloc -= size;
+       sk_forward_alloc_add(sk, -size);
 
        /* no need to setup a destructor, we will explicitly release the
         * forward allocated memory on dequeue
index 967913a..0b6ee96 100644 (file)
@@ -1378,7 +1378,7 @@ retry:
         * idev->desync_factor if it's larger
         */
        cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
-       max_desync_factor = min_t(__u32,
+       max_desync_factor = min_t(long,
                                  idev->cnf.max_desync_factor,
                                  cnf_temp_preferred_lft - regen_advance);
 
index d94041b..b837881 100644 (file)
@@ -99,7 +99,8 @@ static bool ip6_can_use_hint(const struct sk_buff *skb,
 static struct sk_buff *ip6_extract_route_hint(const struct net *net,
                                              struct sk_buff *skb)
 {
-       if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
+       if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) ||
+           IP6CB(skb)->flags & IP6SKB_MULTIPATH)
                return NULL;
 
        return skb;
index 0665e8b..54fc4c7 100644 (file)
@@ -451,7 +451,6 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
        struct dst_entry *dst = skb_dst(skb);
 
        __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
-       __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 
 #ifdef CONFIG_NET_SWITCHDEV
        if (skb->offload_l3_fwd_mark) {
@@ -1502,7 +1501,7 @@ static int __ip6_append_data(struct sock *sk,
        orig_mtu = mtu;
 
        if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
-           sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+           READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
                tskey = atomic_inc_return(&sk->sk_tskey) - 1;
 
        hh_len = LL_RESERVED_SPACE(rt->dst.dev);
index 67a3b8f..30ca064 100644 (file)
@@ -2010,8 +2010,6 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
 {
        IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
                      IPSTATS_MIB_OUTFORWDATAGRAMS);
-       IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
-                     IPSTATS_MIB_OUTOCTETS, skb->len);
        return dst_output(net, sk, skb);
 }
 
index 1b27728..5831aaa 100644 (file)
@@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                return -EINVAL;
 
        ipcm6_init_sk(&ipc6, np);
-       ipc6.sockc.tsflags = sk->sk_tsflags;
+       ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
        ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
 
        fl6.flowi6_oif = oif;
index 0eae766..42fcec3 100644 (file)
@@ -772,7 +772,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        fl6.flowi6_uid = sk->sk_uid;
 
        ipcm6_init(&ipc6);
-       ipc6.sockc.tsflags = sk->sk_tsflags;
+       ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
        ipc6.sockc.mark = fl6.flowi6_mark;
 
        if (sin6) {
index d15a9e3..9c687b3 100644 (file)
@@ -423,6 +423,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
        if (match->nh && have_oif_match && res->nh)
                return;
 
+       if (skb)
+               IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
+
        /* We might have already computed the hash for ICMPv6 errors. In such
         * case it will always be non-zero. Otherwise now is the time to do it.
         */
index ebc6ae4..86b5d50 100644 (file)
@@ -1339,7 +1339,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        ipcm6_init(&ipc6);
        ipc6.gso_size = READ_ONCE(up->gso_size);
-       ipc6.sockc.tsflags = sk->sk_tsflags;
+       ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
        ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
 
        /* destination address check */
index 393f01b..4580f61 100644 (file)
@@ -1859,6 +1859,8 @@ static __net_exit void kcm_exit_net(struct net *net)
         * that all multiplexors and psocks have been destroyed.
         */
        WARN_ON(!list_empty(&knet->mux_list));
+
+       mutex_destroy(&knet->mutex);
 }
 
 static struct pernet_operations kcm_net_ops = {
index 933b257..a7fc16f 100644 (file)
@@ -134,9 +134,15 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
        __kfree_skb(skb);
 }
 
+static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
+{
+       WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
+                  mptcp_sk(sk)->rmem_fwd_alloc + size);
+}
+
 static void mptcp_rmem_charge(struct sock *sk, int size)
 {
-       mptcp_sk(sk)->rmem_fwd_alloc -= size;
+       mptcp_rmem_fwd_alloc_add(sk, -size);
 }
 
 static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
@@ -177,7 +183,7 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
 static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
 {
        amount >>= PAGE_SHIFT;
-       mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
+       mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
        __sk_mem_reduce_allocated(sk, amount);
 }
 
@@ -186,7 +192,7 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size)
        struct mptcp_sock *msk = mptcp_sk(sk);
        int reclaimable;
 
-       msk->rmem_fwd_alloc += size;
+       mptcp_rmem_fwd_alloc_add(sk, size);
        reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
 
        /* see sk_mem_uncharge() for the rationale behind the following schema */
@@ -341,7 +347,7 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
        if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
                return false;
 
-       msk->rmem_fwd_alloc += amount;
+       mptcp_rmem_fwd_alloc_add(sk, amount);
        return true;
 }
 
@@ -1800,7 +1806,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                }
 
                /* data successfully copied into the write queue */
-               sk->sk_forward_alloc -= total_ts;
+               sk_forward_alloc_add(sk, -total_ts);
                copied += psize;
                dfrag->data_len += psize;
                frag_truesize += psize;
@@ -3257,8 +3263,8 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
        /* move all the rx fwd alloc into the sk_mem_reclaim_final in
         * inet_sock_destruct() will dispose it
         */
-       sk->sk_forward_alloc += msk->rmem_fwd_alloc;
-       msk->rmem_fwd_alloc = 0;
+       sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
+       WRITE_ONCE(msk->rmem_fwd_alloc, 0);
        mptcp_token_destroy(msk);
        mptcp_pm_free_anno_list(msk);
        mptcp_free_local_addr_list(msk);
@@ -3522,7 +3528,8 @@ static void mptcp_shutdown(struct sock *sk, int how)
 
 static int mptcp_forward_alloc_get(const struct sock *sk)
 {
-       return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+       return READ_ONCE(sk->sk_forward_alloc) +
+              READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
 }
 
 static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
index 005a7ce..bf4f91b 100644 (file)
@@ -36,6 +36,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net");
 #define IP_SET_HASH_WITH_PROTO
 #define IP_SET_HASH_WITH_NETS
 #define IPSET_NET_COUNT 2
+#define IP_SET_HASH_WITH_NET0
 
 /* IPv4 variant */
 
index 41b826d..e429ebb 100644 (file)
@@ -102,6 +102,7 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types
        [NFT_MSG_NEWFLOWTABLE]  = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
        [NFT_MSG_GETFLOWTABLE]  = AUDIT_NFT_OP_INVALID,
        [NFT_MSG_DELFLOWTABLE]  = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+       [NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET,
 };
 
 static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state)
@@ -3421,6 +3422,18 @@ err:
        nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
+static void audit_log_rule_reset(const struct nft_table *table,
+                                unsigned int base_seq,
+                                unsigned int nentries)
+{
+       char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
+                             table->name, base_seq);
+
+       audit_log_nfcfg(buf, table->family, nentries,
+                       AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+       kfree(buf);
+}
+
 struct nft_rule_dump_ctx {
        char *table;
        char *chain;
@@ -3467,6 +3480,10 @@ cont:
 cont_skip:
                (*idx)++;
        }
+
+       if (reset && *idx)
+               audit_log_rule_reset(table, cb->seq, *idx);
+
        return 0;
 }
 
@@ -3634,6 +3651,9 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
        if (err < 0)
                goto err_fill_rule_info;
 
+       if (reset)
+               audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1);
+
        return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
 
 err_fill_rule_info:
@@ -5624,13 +5644,25 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
        return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
 }
 
+static void audit_log_nft_set_reset(const struct nft_table *table,
+                                   unsigned int base_seq,
+                                   unsigned int nentries)
+{
+       char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
+
+       audit_log_nfcfg(buf, table->family, nentries,
+                       AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC);
+       kfree(buf);
+}
+
 struct nft_set_dump_ctx {
        const struct nft_set    *set;
        struct nft_ctx          ctx;
 };
 
 static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
-                                const struct nft_set *set, bool reset)
+                                const struct nft_set *set, bool reset,
+                                unsigned int base_seq)
 {
        struct nft_set_elem_catchall *catchall;
        u8 genmask = nft_genmask_cur(net);
@@ -5646,6 +5678,8 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
 
                elem.priv = catchall->elem;
                ret = nf_tables_fill_setelem(skb, set, &elem, reset);
+               if (reset && !ret)
+                       audit_log_nft_set_reset(set->table, base_seq, 1);
                break;
        }
 
@@ -5725,12 +5759,17 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
        set->ops->walk(&dump_ctx->ctx, set, &args.iter);
 
        if (!args.iter.err && args.iter.count == cb->args[0])
-               args.iter.err = nft_set_catchall_dump(net, skb, set, reset);
-       rcu_read_unlock();
-
+               args.iter.err = nft_set_catchall_dump(net, skb, set,
+                                                     reset, cb->seq);
        nla_nest_end(skb, nest);
        nlmsg_end(skb, nlh);
 
+       if (reset && args.iter.count > args.iter.skip)
+               audit_log_nft_set_reset(table, cb->seq,
+                                       args.iter.count - args.iter.skip);
+
+       rcu_read_unlock();
+
        if (args.iter.err && args.iter.err != -EMSGSIZE)
                return args.iter.err;
        if (args.iter.count == cb->args[0])
@@ -5955,13 +5994,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
        struct netlink_ext_ack *extack = info->extack;
        u8 genmask = nft_genmask_cur(info->net);
        u8 family = info->nfmsg->nfgen_family;
+       int rem, err = 0, nelems = 0;
        struct net *net = info->net;
        struct nft_table *table;
        struct nft_set *set;
        struct nlattr *attr;
        struct nft_ctx ctx;
        bool reset = false;
-       int rem, err = 0;
 
        table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
                                 genmask, 0);
@@ -6004,8 +6043,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
                        NL_SET_BAD_ATTR(extack, attr);
                        break;
                }
+               nelems++;
        }
 
+       if (reset)
+               audit_log_nft_set_reset(table, nft_pernet(net)->base_seq,
+                                       nelems);
+
        return err;
 }
 
index 8f1bfa6..50723ba 100644 (file)
@@ -315,6 +315,14 @@ static int nfnl_osf_add_callback(struct sk_buff *skb,
 
        f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
 
+       if (f->opt_num > ARRAY_SIZE(f->opt))
+               return -EINVAL;
+
+       if (!memchr(f->genre, 0, MAXGENRELEN) ||
+           !memchr(f->subtype, 0, MAXGENRELEN) ||
+           !memchr(f->version, 0, MAXGENRELEN))
+               return -EINVAL;
+
        kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
        if (!kf)
                return -ENOMEM;
index 7f856ce..3fbaa7b 100644 (file)
@@ -35,6 +35,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
                return opt[offset + 1];
 }
 
+static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
+{
+       if (len % NFT_REG32_SIZE)
+               dest[len / NFT_REG32_SIZE] = 0;
+
+       return skb_copy_bits(skb, offset, dest, len);
+}
+
 static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
                                 struct nft_regs *regs,
                                 const struct nft_pktinfo *pkt)
@@ -56,8 +64,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
        }
        offset += priv->offset;
 
-       dest[priv->len / NFT_REG32_SIZE] = 0;
-       if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+       if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
                goto err;
        return;
 err:
@@ -153,8 +160,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
        }
        offset += priv->offset;
 
-       dest[priv->len / NFT_REG32_SIZE] = 0;
-       if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+       if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
                goto err;
        return;
 err:
@@ -210,7 +216,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
                if (priv->flags & NFT_EXTHDR_F_PRESENT) {
                        *dest = 1;
                } else {
-                       dest[priv->len / NFT_REG32_SIZE] = 0;
+                       if (priv->len % NFT_REG32_SIZE)
+                               dest[priv->len / NFT_REG32_SIZE] = 0;
                        memcpy(dest, opt + offset, priv->len);
                }
 
@@ -238,7 +245,12 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
        if (!tcph)
                goto err;
 
+       if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+               goto err;
+
+       tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
        opt = (u8 *)tcph;
+
        for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
                union {
                        __be16 v16;
@@ -253,15 +265,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
                if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
                        goto err;
 
-               if (skb_ensure_writable(pkt->skb,
-                                       nft_thoff(pkt) + i + priv->len))
-                       goto err;
-
-               tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
-                                             &tcphdr_len);
-               if (!tcph)
-                       goto err;
-
                offset = i + priv->offset;
 
                switch (priv->len) {
@@ -325,9 +328,9 @@ static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
        if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
                goto drop;
 
-       opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
-       if (!opt)
-               goto err;
+       tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
+       opt = (u8 *)tcph;
+
        for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
                unsigned int j;
 
@@ -392,9 +395,8 @@ static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
                            offset + ntohs(sch->length) > pkt->skb->len)
                                break;
 
-                       dest[priv->len / NFT_REG32_SIZE] = 0;
-                       if (skb_copy_bits(pkt->skb, offset + priv->offset,
-                                         dest, priv->len) < 0)
+                       if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
+                                               dest, priv->len) < 0)
                                break;
                        return;
                }
index c6435e7..f250b53 100644 (file)
@@ -312,6 +312,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
        struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
        struct rb_node *node, *next, *parent, **p, *first = NULL;
        struct nft_rbtree *priv = nft_set_priv(set);
+       u8 cur_genmask = nft_genmask_cur(net);
        u8 genmask = nft_genmask_next(net);
        int d, err;
 
@@ -357,8 +358,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
                if (!nft_set_elem_active(&rbe->ext, genmask))
                        continue;
 
-               /* perform garbage collection to avoid bogus overlap reports. */
-               if (nft_set_elem_expired(&rbe->ext)) {
+               /* perform garbage collection to avoid bogus overlap reports
+                * but skip new elements in this transaction.
+                */
+               if (nft_set_elem_expired(&rbe->ext) &&
+                   nft_set_elem_active(&rbe->ext, cur_genmask)) {
                        err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
                        if (err < 0)
                                return err;
index e896109..b46a6a5 100644 (file)
@@ -149,6 +149,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
 {
        const struct xt_sctp_info *info = par->matchinfo;
 
+       if (info->flag_count > ARRAY_SIZE(info->flag_info))
+               return -EINVAL;
        if (info->flags & ~XT_SCTP_VALID_FLAGS)
                return -EINVAL;
        if (info->invflags & ~XT_SCTP_VALID_FLAGS)
index 177b40d..117d461 100644 (file)
@@ -96,11 +96,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
        return ret ^ data->invert;
 }
 
+static int u32_mt_checkentry(const struct xt_mtchk_param *par)
+{
+       const struct xt_u32 *data = par->matchinfo;
+       const struct xt_u32_test *ct;
+       unsigned int i;
+
+       if (data->ntests > ARRAY_SIZE(data->tests))
+               return -EINVAL;
+
+       for (i = 0; i < data->ntests; ++i) {
+               ct = &data->tests[i];
+
+               if (ct->nnums > ARRAY_SIZE(ct->location) ||
+                   ct->nvalues > ARRAY_SIZE(ct->value))
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 static struct xt_match xt_u32_mt_reg __read_mostly = {
        .name       = "u32",
        .revision   = 0,
        .family     = NFPROTO_UNSPEC,
        .match      = u32_mt,
+       .checkentry = u32_mt_checkentry,
        .matchsize  = sizeof(struct xt_u32),
        .me         = THIS_MODULE,
 };
index 591d87d..68e6acd 100644 (file)
@@ -61,6 +61,7 @@ struct fq_pie_sched_data {
        struct pie_params p_params;
        u32 ecn_prob;
        u32 flows_cnt;
+       u32 flows_cursor;
        u32 quantum;
        u32 memory_limit;
        u32 new_flow_count;
@@ -375,22 +376,32 @@ flow_error:
 static void fq_pie_timer(struct timer_list *t)
 {
        struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
+       unsigned long next, tupdate;
        struct Qdisc *sch = q->sch;
        spinlock_t *root_lock; /* to lock qdisc for probability calculations */
-       u32 idx;
+       int max_cnt, i;
 
        rcu_read_lock();
        root_lock = qdisc_lock(qdisc_root_sleeping(sch));
        spin_lock(root_lock);
 
-       for (idx = 0; idx < q->flows_cnt; idx++)
-               pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
-                                         q->flows[idx].backlog);
-
-       /* reset the timer to fire after 'tupdate' jiffies. */
-       if (q->p_params.tupdate)
-               mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
+       /* Limit this expensive loop to 2048 flows per round. */
+       max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
+       for (i = 0; i < max_cnt; i++) {
+               pie_calculate_probability(&q->p_params,
+                                         &q->flows[q->flows_cursor].vars,
+                                         q->flows[q->flows_cursor].backlog);
+               q->flows_cursor++;
+       }
 
+       tupdate = q->p_params.tupdate;
+       next = 0;
+       if (q->flows_cursor >= q->flows_cnt) {
+               q->flows_cursor = 0;
+               next = tupdate;
+       }
+       if (tupdate)
+               mod_timer(&q->adapt_timer, jiffies + next);
        spin_unlock(root_lock);
        rcu_read_unlock();
 }
index ea8c4a7..35f49ed 100644 (file)
@@ -207,7 +207,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
        .priv_size   =       sizeof(struct plug_sched_data),
        .enqueue     =       plug_enqueue,
        .dequeue     =       plug_dequeue,
-       .peek        =       qdisc_peek_head,
+       .peek        =       qdisc_peek_dequeued,
        .init        =       plug_init,
        .change      =       plug_change,
        .reset       =       qdisc_reset_queue,
index 1a25752..546c10a 100644 (file)
@@ -974,10 +974,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
 }
 
 /* Dequeue head packet of the head class in the DRR queue of the aggregate. */
-static void agg_dequeue(struct qfq_aggregate *agg,
-                       struct qfq_class *cl, unsigned int len)
+static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
+                                  struct qfq_class *cl, unsigned int len)
 {
-       qdisc_dequeue_peeked(cl->qdisc);
+       struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
+
+       if (!skb)
+               return NULL;
 
        cl->deficit -= (int) len;
 
@@ -987,6 +990,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
                cl->deficit += agg->lmax;
                list_move_tail(&cl->alist, &agg->active);
        }
+
+       return skb;
 }
 
 static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
@@ -1132,11 +1137,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
        if (!skb)
                return NULL;
 
-       qdisc_qstats_backlog_dec(sch, skb);
        sch->q.qlen--;
+
+       skb = agg_dequeue(in_serv_agg, cl, len);
+
+       if (!skb) {
+               sch->q.qlen++;
+               return NULL;
+       }
+
+       qdisc_qstats_backlog_dec(sch, skb);
        qdisc_bstats_update(sch, skb);
 
-       agg_dequeue(in_serv_agg, cl, len);
        /* If lmax is lowered, through qfq_change_class, for a class
         * owning pending packets with larger size than the new value
         * of lmax, then the following condition may hold.
index f13d6a3..ec00ee7 100644 (file)
@@ -282,7 +282,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
                assoc->init_retries, assoc->shutdown_retries,
                assoc->rtx_data_chunks,
                refcount_read(&sk->sk_wmem_alloc),
-               sk->sk_wmem_queued,
+               READ_ONCE(sk->sk_wmem_queued),
                sk->sk_sndbuf,
                sk->sk_rcvbuf);
        seq_printf(seq, "\n");
index fd0631e..ab943e8 100644 (file)
@@ -69,7 +69,7 @@
 #include <net/sctp/stream_sched.h>
 
 /* Forward declarations for internal helper functions. */
-static bool sctp_writeable(struct sock *sk);
+static bool sctp_writeable(const struct sock *sk);
 static void sctp_wfree(struct sk_buff *skb);
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
                                size_t msg_len);
@@ -140,7 +140,7 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
 
        refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
        asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
-       sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
+       sk_wmem_queued_add(sk, chunk->skb->truesize + sizeof(struct sctp_chunk));
        sk_mem_charge(sk, chunk->skb->truesize);
 }
 
@@ -9144,7 +9144,7 @@ static void sctp_wfree(struct sk_buff *skb)
        struct sock *sk = asoc->base.sk;
 
        sk_mem_uncharge(sk, skb->truesize);
-       sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
+       sk_wmem_queued_add(sk, -(skb->truesize + sizeof(struct sctp_chunk)));
        asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
        WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
                                      &sk->sk_wmem_alloc));
@@ -9299,9 +9299,9 @@ void sctp_write_space(struct sock *sk)
  * UDP-style sockets or TCP-style sockets, this code should work.
  *  - Daisy
  */
-static bool sctp_writeable(struct sock *sk)
+static bool sctp_writeable(const struct sock *sk)
 {
-       return sk->sk_sndbuf > sk->sk_wmem_queued;
+       return READ_ONCE(sk->sk_sndbuf) > READ_ONCE(sk->sk_wmem_queued);
 }
 
 /* Wait for an association to go into ESTABLISHED state. If timeout is 0,
index 77f2832..c8b08b3 100644 (file)
@@ -827,7 +827,7 @@ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
 
 static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
 {
-       bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
+       bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
        struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
        struct net_device *orig_dev;
        ktime_t hwtstamp;
@@ -879,12 +879,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
        int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
        int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
        struct scm_timestamping_internal tss;
-
        int empty = 1, false_tstamp = 0;
        struct skb_shared_hwtstamps *shhwtstamps =
                skb_hwtstamps(skb);
        int if_index;
        ktime_t hwtstamp;
+       u32 tsflags;
 
        /* Race occurred between timestamp enabling and packet
           receiving.  Fill in the current time for now. */
@@ -926,11 +926,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
        }
 
        memset(&tss, 0, sizeof(tss));
-       if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+       tsflags = READ_ONCE(sk->sk_tsflags);
+       if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
            ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
                empty = 0;
        if (shhwtstamps &&
-           (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+           (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
            !skb_is_swtx_tstamp(skb, false_tstamp)) {
                if_index = 0;
                if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
@@ -938,14 +939,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
                else
                        hwtstamp = shhwtstamps->hwtstamp;
 
-               if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+               if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
                        hwtstamp = ptp_convert_timestamp(&hwtstamp,
-                                                        sk->sk_bind_phc);
+                                                        READ_ONCE(sk->sk_bind_phc));
 
                if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
                        empty = 0;
 
-                       if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+                       if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
                            !skb_is_err_queue(skb))
                                put_ts_pktinfo(msg, skb, if_index);
                }
index 86930a8..3e8a04a 100644 (file)
@@ -680,7 +680,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
         *        What the above comment does talk about? --ANK(980817)
         */
 
-       if (unix_tot_inflight)
+       if (READ_ONCE(unix_tot_inflight))
                unix_gc();              /* Garbage collect fds */
 }
 
index e9dde71..6ff628f 100644 (file)
@@ -64,7 +64,7 @@ void unix_inflight(struct user_struct *user, struct file *fp)
                /* Paired with READ_ONCE() in wait_for_unix_gc() */
                WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
        }
-       user->unix_inflight++;
+       WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
        spin_unlock(&unix_gc_lock);
 }
 
@@ -85,7 +85,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
                /* Paired with READ_ONCE() in wait_for_unix_gc() */
                WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
        }
-       user->unix_inflight--;
+       WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
        spin_unlock(&unix_gc_lock);
 }
 
@@ -99,7 +99,7 @@ static inline bool too_many_unix_fds(struct task_struct *p)
 {
        struct user_struct *user = current_user();
 
-       if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+       if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
                return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
        return false;
 }
index fcfc847..55f8b9b 100644 (file)
@@ -602,7 +602,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
 
        for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
                if (unlikely(i >= MAX_SKB_FRAGS))
-                       return ERR_PTR(-EFAULT);
+                       return ERR_PTR(-EOVERFLOW);
 
                page = pool->umem->pgs[addr >> PAGE_SHIFT];
                get_page(page);
@@ -655,15 +655,17 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
                        skb_put(skb, len);
 
                        err = skb_store_bits(skb, 0, buffer, len);
-                       if (unlikely(err))
+                       if (unlikely(err)) {
+                               kfree_skb(skb);
                                goto free_err;
+                       }
                } else {
                        int nr_frags = skb_shinfo(skb)->nr_frags;
                        struct page *page;
                        u8 *vaddr;
 
                        if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
-                               err = -EFAULT;
+                               err = -EOVERFLOW;
                                goto free_err;
                        }
 
@@ -690,12 +692,14 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
        return skb;
 
 free_err:
-       if (err == -EAGAIN) {
-               xsk_cq_cancel_locked(xs, 1);
-       } else {
-               xsk_set_destructor_arg(skb);
-               xsk_drop_skb(skb);
+       if (err == -EOVERFLOW) {
+               /* Drop the packet */
+               xsk_set_destructor_arg(xs->skb);
+               xsk_drop_skb(xs->skb);
                xskq_cons_release(xs->tx);
+       } else {
+               /* Let application retry */
+               xsk_cq_cancel_locked(xs, 1);
        }
 
        return ERR_PTR(err);
@@ -738,7 +742,7 @@ static int __xsk_generic_xmit(struct sock *sk)
                skb = xsk_build_skb(xs, &desc);
                if (IS_ERR(skb)) {
                        err = PTR_ERR(skb);
-                       if (err == -EAGAIN)
+                       if (err != -EOVERFLOW)
                                goto out;
                        err = 0;
                        continue;
index c014217..22b36c8 100644 (file)
@@ -111,6 +111,9 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
        sock_diag_save_cookie(sk, msg->xdiag_cookie);
 
        mutex_lock(&xs->mutex);
+       if (READ_ONCE(xs->state) == XSK_UNBOUND)
+               goto out_nlmsg_trim;
+
        if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
                goto out_nlmsg_trim;
 
index 40cd13e..2fe6f28 100644 (file)
@@ -6,7 +6,79 @@
 # They are independent, and can be combined like W=12 or W=123e.
 # ==========================================================================
 
-KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
+# Default set of warnings, always enabled
+KBUILD_CFLAGS += -Wall
+KBUILD_CFLAGS += -Wundef
+KBUILD_CFLAGS += -Werror=implicit-function-declaration
+KBUILD_CFLAGS += -Werror=implicit-int
+KBUILD_CFLAGS += -Werror=return-type
+KBUILD_CFLAGS += -Werror=strict-prototypes
+KBUILD_CFLAGS += -Wno-format-security
+KBUILD_CFLAGS += -Wno-trigraphs
+KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+
+ifneq ($(CONFIG_FRAME_WARN),0)
+KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN)
+endif
+
+KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
+KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
+KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
+
+ifdef CONFIG_CC_IS_CLANG
+# The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable.
+KBUILD_CFLAGS += -Wno-gnu
+else
+
+# gcc inanely warns about local variables called 'main'
+KBUILD_CFLAGS += -Wno-main
+endif
+
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+
+# These result in bogus false positives
+KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
+
+# Variable Length Arrays (VLAs) should not be used anywhere in the kernel
+KBUILD_CFLAGS += -Wvla
+
+# disable pointer signed / unsigned warnings in gcc 4.0
+KBUILD_CFLAGS += -Wno-pointer-sign
+
+# In order to make sure new function cast mismatches are not introduced
+# in the kernel (to avoid tripping CFI checking), the kernel should be
+# globally built with -Wcast-function-type.
+KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type)
+
+# The allocators already balk at large sizes, so silence the compiler
+# warnings for bounds checks involving those possible values. While
+# -Wno-alloc-size-larger-than would normally be used here, earlier versions
+# of gcc (<9.1) weirdly don't handle the option correctly when _other_
+# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX
+# doesn't work (as it is documented to), silently resolving to "0" prior to
+# version 9.1 (and producing an error more recently). Numeric values larger
+# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently
+# ignored, continuing to default to PTRDIFF_MAX. So, left with no other
+# choice, we must perform a versioned check to disable this warning.
+# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au
+KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
+KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
+
+# Prohibit date/time macros, which would make the build non-deterministic
+KBUILD_CFLAGS += -Werror=date-time
+
+# enforce correct pointer usage
+KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types)
+
+# Require designated initializers for all marked structures
+KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
+
+# Warn if there is an enum types mismatch
+KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion)
 
 # backward compatibility
 KBUILD_EXTRA_WARN ?= $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)
@@ -24,6 +96,7 @@ ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),)
 
 KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
 KBUILD_CFLAGS += -Wmissing-declarations
+KBUILD_CFLAGS += $(call cc-option, -Wrestrict)
 KBUILD_CFLAGS += -Wmissing-format-attribute
 KBUILD_CFLAGS += -Wmissing-prototypes
 KBUILD_CFLAGS += -Wold-style-definition
@@ -31,12 +104,10 @@ KBUILD_CFLAGS += -Wmissing-include-dirs
 KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
 KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
 KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
+KBUILD_CFLAGS += $(call cc-option, -Wformat-overflow)
+KBUILD_CFLAGS += $(call cc-option, -Wformat-truncation)
+KBUILD_CFLAGS += $(call cc-option, -Wstringop-overflow)
 KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
-# The following turn off the warnings enabled by -Wextra
-KBUILD_CFLAGS += -Wno-missing-field-initializers
-KBUILD_CFLAGS += -Wno-sign-compare
-KBUILD_CFLAGS += -Wno-type-limits
-KBUILD_CFLAGS += -Wno-shift-negative-value
 
 KBUILD_CPPFLAGS += -Wundef
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
@@ -45,9 +116,16 @@ else
 
 # Some diagnostics enabled by default are noisy.
 # Suppress them by using -Wno... except for W=1.
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
+KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
 
 ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Wno-initializer-overrides
 # Clang before clang-16 would warn on default argument promotions.
 ifneq ($(call clang-min-version, 160000),y)
 # Disable -Wformat
@@ -61,7 +139,6 @@ ifeq ($(call clang-min-version, 120000),y)
 KBUILD_CFLAGS += -Wformat-insufficient-args
 endif
 endif
-KBUILD_CFLAGS += -Wno-sign-compare
 KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
 KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
 KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
@@ -83,8 +160,25 @@ KBUILD_CFLAGS += -Wtype-limits
 KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
 KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
 
+ifdef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += -Winitializer-overrides
+endif
+
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
 
+else
+
+# The following turn off the warnings enabled by -Wextra
+KBUILD_CFLAGS += -Wno-missing-field-initializers
+KBUILD_CFLAGS += -Wno-type-limits
+KBUILD_CFLAGS += -Wno-shift-negative-value
+
+ifdef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += -Wno-initializer-overrides
+else
+KBUILD_CFLAGS += -Wno-maybe-uninitialized
+endif
+
 endif
 
 #
@@ -106,6 +200,11 @@ KBUILD_CFLAGS += $(call cc-option, -Wpacked-bitfield-compat)
 
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN3
 
+else
+
+# The following turn off the warnings enabled by -Wextra
+KBUILD_CFLAGS += -Wno-sign-compare
+
 endif
 
 #
index ab0c5bd..c59cc57 100644 (file)
@@ -9,6 +9,35 @@ __modinst:
 include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
 
+install-y :=
+
+ifeq ($(KBUILD_EXTMOD)$(sign-only),)
+
+# remove the old directory and symlink
+$(shell rm -fr $(MODLIB)/kernel $(MODLIB)/build)
+
+install-$(CONFIG_MODULES) += $(addprefix $(MODLIB)/, build modules.order)
+
+$(MODLIB)/build: FORCE
+       $(call cmd,symlink)
+
+quiet_cmd_symlink = SYMLINK $@
+      cmd_symlink = ln -s $(CURDIR) $@
+
+$(MODLIB)/modules.order: modules.order FORCE
+       $(call cmd,install_modorder)
+
+quiet_cmd_install_modorder = INSTALL $@
+      cmd_install_modorder = sed 's:^\(.*\)\.o$$:kernel/\1.ko:' $< > $@
+
+# Install modules.builtin(.modinfo) even when CONFIG_MODULES is disabled.
+install-y += $(addprefix $(MODLIB)/, modules.builtin modules.builtin.modinfo)
+
+$(addprefix $(MODLIB)/, modules.builtin modules.builtin.modinfo): $(MODLIB)/%: % FORCE
+       $(call cmd,install)
+
+endif
+
 modules := $(call read-file, $(MODORDER))
 
 ifeq ($(KBUILD_EXTMOD),)
@@ -27,15 +56,16 @@ suffix-$(CONFIG_MODULE_COMPRESS_XZ) := .xz
 suffix-$(CONFIG_MODULE_COMPRESS_ZSTD)  := .zst
 
 modules := $(patsubst $(extmod_prefix)%.o, $(dst)/%.ko$(suffix-y), $(modules))
+install-$(CONFIG_MODULES) += $(modules)
 
-__modinst: $(modules)
+__modinst: $(install-y)
        @:
 
 #
 # Installation
 #
 quiet_cmd_install = INSTALL $@
-      cmd_install = mkdir -p $(dir $@); cp $< $@
+      cmd_install = cp $< $@
 
 # Strip
 #
@@ -65,7 +95,6 @@ endif
 # Signing
 # Don't stop modules_install even if we can't sign external modules.
 #
-ifeq ($(CONFIG_MODULE_SIG_ALL),y)
 ifeq ($(filter pkcs11:%, $(CONFIG_MODULE_SIG_KEY)),)
 sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(srctree)/)$(CONFIG_MODULE_SIG_KEY)
 else
@@ -74,18 +103,34 @@ endif
 quiet_cmd_sign = SIGN    $@
       cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" certs/signing_key.x509 $@ \
                  $(if $(KBUILD_EXTMOD),|| true)
-else
+
+ifeq ($(sign-only),)
+
+# During modules_install, modules are signed only when CONFIG_MODULE_SIG_ALL=y.
+ifndef CONFIG_MODULE_SIG_ALL
 quiet_cmd_sign :=
       cmd_sign := :
 endif
 
-ifeq ($(modules_sign_only),)
+# Create necessary directories
+$(shell mkdir -p $(sort $(dir $(install-y))))
 
 $(dst)/%.ko: $(extmod_prefix)%.ko FORCE
        $(call cmd,install)
        $(call cmd,strip)
        $(call cmd,sign)
 
+ifdef CONFIG_MODULES
+__modinst: depmod
+
+PHONY += depmod
+depmod: $(install-y)
+       $(call cmd,depmod)
+
+quiet_cmd_depmod = DEPMOD  $(MODLIB)
+      cmd_depmod = $(srctree)/scripts/depmod.sh $(KERNELRELEASE)
+endif
+
 else
 
 $(dst)/%.ko: FORCE
index 39472e8..739402f 100644 (file)
@@ -41,6 +41,7 @@ include $(srctree)/scripts/Kbuild.include
 MODPOST = scripts/mod/modpost
 
 modpost-args =                                                                         \
+       $(if $(CONFIG_MODULES),-M)                                                      \
        $(if $(CONFIG_MODVERSIONS),-m)                                                  \
        $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a)                                        \
        $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E)                                  \
index 92dbc88..2bcab02 100644 (file)
@@ -11,7 +11,6 @@ TAR_CONTENT := Documentation LICENSES arch block certs crypto drivers fs \
                samples scripts security sound tools usr virt \
                .config Makefile \
                Kbuild Kconfig COPYING $(wildcard localversion*)
-MKSPEC     := $(srctree)/scripts/package/mkspec
 
 quiet_cmd_src_tar = TAR     $(2).tar.gz
       cmd_src_tar = \
@@ -66,30 +65,38 @@ $(linux-tarballs): archive-args = --prefix=linux/ $$(cat $<)
 $(linux-tarballs): .tmp_HEAD FORCE
        $(call if_changed,archive)
 
-# rpm-pkg
+# rpm-pkg srcrpm-pkg binrpm-pkg
 # ---------------------------------------------------------------------------
-PHONY += rpm-pkg
-rpm-pkg: srpm = $(shell rpmspec --srpm --query --queryformat='%{name}-%{VERSION}-%{RELEASE}.src.rpm' kernel.spec)
-rpm-pkg: srcrpm-pkg
-       +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -rb $(srpm) \
-       --define='_smp_mflags %{nil}'
 
-# srcrpm-pkg
-# ---------------------------------------------------------------------------
-PHONY += srcrpm-pkg
-srcrpm-pkg: linux.tar.gz
-       $(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec
-       +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -bs kernel.spec \
-       --define='_smp_mflags %{nil}' --define='_sourcedir rpmbuild/SOURCES' --define='_srcrpmdir .'
+quiet_cmd_mkspec = GEN     $@
+      cmd_mkspec = $(srctree)/scripts/package/mkspec > $@
 
-# binrpm-pkg
-# ---------------------------------------------------------------------------
-PHONY += binrpm-pkg
-binrpm-pkg:
-       $(MAKE) -f $(srctree)/Makefile
-       $(CONFIG_SHELL) $(MKSPEC) prebuilt > $(objtree)/binkernel.spec
-       +rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \
-               $(UTS_MACHINE)-linux -bb $(objtree)/binkernel.spec
+kernel.spec: FORCE
+       $(call cmd,mkspec)
+
+PHONY += rpm-sources
+rpm-sources: linux.tar.gz
+       $(Q)mkdir -p rpmbuild/SOURCES
+       $(Q)ln -f linux.tar.gz rpmbuild/SOURCES/linux.tar.gz
+       $(Q)cp $(KCONFIG_CONFIG) rpmbuild/SOURCES/config
+       $(Q)$(srctree)/scripts/package/gen-diff-patch rpmbuild/SOURCES/diff.patch
+
+PHONY += rpm-pkg srcrpm-pkg binrpm-pkg
+
+rpm-pkg:    private build-type := a
+srcrpm-pkg: private build-type := s
+binrpm-pkg: private build-type := b
+
+rpm-pkg srcrpm-pkg: rpm-sources
+rpm-pkg srcrpm-pkg binrpm-pkg: kernel.spec
+       +$(strip rpmbuild -b$(build-type) kernel.spec \
+       --define='_topdir $(abspath rpmbuild)' \
+       $(if $(filter a b, $(build-type)), \
+               --target $(UTS_MACHINE)-linux --build-in-place --noprep --define='_smp_mflags %{nil}' \
+               $$(rpm -q rpm >/dev/null 2>&1 || echo --nodeps)) \
+       $(if $(filter b, $(build-type)), \
+               --without devel) \
+       $(RPMOPTS))
 
 # deb-pkg srcdeb-pkg bindeb-pkg
 # ---------------------------------------------------------------------------
@@ -141,14 +148,10 @@ deb-pkg srcdeb-pkg bindeb-pkg:
        $(if $(findstring source, $(build-type)), \
                --unsigned-source --compression=$(KDEB_SOURCE_COMPRESS)) \
        $(if $(findstring binary, $(build-type)), \
-               -r$(KBUILD_PKG_ROOTCMD) -a$$(cat debian/arch), \
+               --rules-file='$(MAKE) -f debian/rules' --jobs=1 -r$(KBUILD_PKG_ROOTCMD) -a$$(cat debian/arch), \
                --no-check-builddeps) \
        $(DPKG_FLAGS))
 
-PHONY += intdeb-pkg
-intdeb-pkg:
-       +$(CONFIG_SHELL) $(srctree)/scripts/package/builddeb
-
 # snap-pkg
 # ---------------------------------------------------------------------------
 PHONY += snap-pkg
index eaae2ce..61b7ddd 100755 (executable)
@@ -59,9 +59,9 @@ class Helper(APIElement):
         Break down helper function protocol into smaller chunks: return type,
         name, distincts arguments.
         """
-        arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$')
+        arg_re = re.compile(r'((\w+ )*?(\w+|...))( (\**)(\w+))?$')
         res = {}
-        proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
+        proto_re = re.compile(r'(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
 
         capture = proto_re.match(self.proto)
         res['ret_type'] = capture.group(1)
@@ -114,11 +114,11 @@ class HeaderParser(object):
         return Helper(proto=proto, desc=desc, ret=ret)
 
     def parse_symbol(self):
-        p = re.compile(' \* ?(BPF\w+)$')
+        p = re.compile(r' \* ?(BPF\w+)$')
         capture = p.match(self.line)
         if not capture:
             raise NoSyscallCommandFound
-        end_re = re.compile(' \* ?NOTES$')
+        end_re = re.compile(r' \* ?NOTES$')
         end = end_re.match(self.line)
         if end:
             raise NoSyscallCommandFound
@@ -133,7 +133,7 @@ class HeaderParser(object):
         #   - Same as above, with "const" and/or "struct" in front of type
         #   - "..." (undefined number of arguments, for bpf_trace_printk())
         # There is at least one term ("void"), and at most five arguments.
-        p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
+        p = re.compile(r' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
         capture = p.match(self.line)
         if not capture:
             raise NoHelperFound
@@ -141,7 +141,7 @@ class HeaderParser(object):
         return capture.group(1)
 
     def parse_desc(self, proto):
-        p = re.compile(' \* ?(?:\t| {5,8})Description$')
+        p = re.compile(r' \* ?(?:\t| {5,8})Description$')
         capture = p.match(self.line)
         if not capture:
             raise Exception("No description section found for " + proto)
@@ -154,7 +154,7 @@ class HeaderParser(object):
             if self.line == ' *\n':
                 desc += '\n'
             else:
-                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
+                p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                 capture = p.match(self.line)
                 if capture:
                     desc_present = True
@@ -167,7 +167,7 @@ class HeaderParser(object):
         return desc
 
     def parse_ret(self, proto):
-        p = re.compile(' \* ?(?:\t| {5,8})Return$')
+        p = re.compile(r' \* ?(?:\t| {5,8})Return$')
         capture = p.match(self.line)
         if not capture:
             raise Exception("No return section found for " + proto)
@@ -180,7 +180,7 @@ class HeaderParser(object):
             if self.line == ' *\n':
                 ret += '\n'
             else:
-                p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
+                p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
                 capture = p.match(self.line)
                 if capture:
                     ret_present = True
@@ -219,12 +219,12 @@ class HeaderParser(object):
         self.seek_to('enum bpf_cmd {',
                      'Could not find start of bpf_cmd enum', 0)
         # Searches for either one or more BPF\w+ enums
-        bpf_p = re.compile('\s*(BPF\w+)+')
+        bpf_p = re.compile(r'\s*(BPF\w+)+')
         # Searches for an enum entry assigned to another entry,
         # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
         # not documented hence should be skipped in check to
         # determine if the right number of syscalls are documented
-        assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
+        assign_p = re.compile(r'\s*(BPF\w+)\s*=\s*(BPF\w+)')
         bpf_cmd_str = ''
         while True:
             capture = assign_p.match(self.line)
@@ -239,7 +239,7 @@ class HeaderParser(object):
                 break
             self.line = self.reader.readline()
         # Find the number of occurences of BPF\w+
-        self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
+        self.enum_syscalls = re.findall(r'(BPF\w+)+', bpf_cmd_str)
 
     def parse_desc_helpers(self):
         self.seek_to(helpersDocStart,
@@ -263,7 +263,7 @@ class HeaderParser(object):
         self.seek_to('#define ___BPF_FUNC_MAPPER(FN, ctx...)',
                      'Could not find start of eBPF helper definition list')
         # Searches for one FN(\w+) define or a backslash for newline
-        p = re.compile('\s*FN\((\w+), (\d+), ##ctx\)|\\\\')
+        p = re.compile(r'\s*FN\((\w+), (\d+), ##ctx\)|\\\\')
         fn_defines_str = ''
         i = 0
         while True:
@@ -278,7 +278,7 @@ class HeaderParser(object):
                 break
             self.line = self.reader.readline()
         # Find the number of occurences of FN(\w+)
-        self.define_unique_helpers = re.findall('FN\(\w+, \d+, ##ctx\)', fn_defines_str)
+        self.define_unique_helpers = re.findall(r'FN\(\w+, \d+, ##ctx\)', fn_defines_str)
 
     def validate_helpers(self):
         last_helper = ''
@@ -425,7 +425,7 @@ class PrinterRST(Printer):
         try:
             cmd = ['git', 'log', '-1', '--pretty=format:%cs', '--no-patch',
                    '-L',
-                   '/{}/,/\*\//:include/uapi/linux/bpf.h'.format(delimiter)]
+                   '/{}/,/\\*\\//:include/uapi/linux/bpf.h'.format(delimiter)]
             date = subprocess.run(cmd, cwd=linuxRoot,
                                   capture_output=True, check=True)
             return date.stdout.decode().rstrip()
@@ -516,7 +516,7 @@ as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
 programs that are compatible with the GNU Privacy License (GPL).
 
 In order to use such helpers, the eBPF program must be loaded with the correct
-license string passed (via **attr**) to the **bpf**\ () system call, and this
+license string passed (via **attr**) to the **bpf**\\ () system call, and this
 generally translates into the C source code of the program containing a line
 similar to the following:
 
@@ -550,7 +550,7 @@ may be interested in:
 * The bpftool utility can be used to probe the availability of helper functions
   on the system (as well as supported program and map types, and a number of
   other parameters). To do so, run **bpftool feature probe** (see
-  **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to
+  **bpftool-feature**\\ (8) for details). Add the **unprivileged** keyword to
   list features available to unprivileged users.
 
 Compatibility between helper functions and program types can generally be found
@@ -562,23 +562,23 @@ other functions, themselves allowing access to additional helpers. The
 requirement for GPL license is also in those **struct bpf_func_proto**.
 
 Compatibility between helper functions and map types can be found in the
-**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*.
+**check_map_func_compatibility**\\ () function in file *kernel/bpf/verifier.c*.
 
 Helper functions that invalidate the checks on **data** and **data_end**
 pointers for network processing are listed in function
-**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*.
+**bpf_helper_changes_pkt_data**\\ () in file *net/core/filter.c*.
 
 SEE ALSO
 ========
 
-**bpf**\ (2),
-**bpftool**\ (8),
-**cgroups**\ (7),
-**ip**\ (8),
-**perf_event_open**\ (2),
-**sendmsg**\ (2),
-**socket**\ (7),
-**tc-bpf**\ (8)'''
+**bpf**\\ (2),
+**bpftool**\\ (8),
+**cgroups**\\ (7),
+**ip**\\ (8),
+**perf_event_open**\\ (2),
+**sendmsg**\\ (2),
+**socket**\\ (7),
+**tc-bpf**\\ (8)'''
         print(footer)
 
     def print_proto(self, helper):
@@ -598,7 +598,7 @@ SEE ALSO
             one_arg = '{}{}'.format(comma, a['type'])
             if a['name']:
                 if a['star']:
-                    one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
+                    one_arg += ' {}**\\ '.format(a['star'].replace('*', '\\*'))
                 else:
                     one_arg += '** '
                 one_arg += '*{}*\\ **'.format(a['name'])
index 3643b4f..e22da27 100755 (executable)
@@ -1,14 +1,16 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 #
-# A depmod wrapper used by the toplevel Makefile
+# A depmod wrapper
 
-if test $# -ne 2; then
-       echo "Usage: $0 /sbin/depmod <kernelrelease>" >&2
+if test $# -ne 1; then
+       echo "Usage: $0 <kernelrelease>" >&2
        exit 1
 fi
-DEPMOD=$1
-KERNELRELEASE=$2
+
+KERNELRELEASE=$1
+
+: ${DEPMOD:=depmod}
 
 if ! test -r System.map ; then
        echo "Warning: modules_install: missing 'System.map' file. Skipping depmod." >&2
@@ -23,33 +25,8 @@ if [ -z $(command -v $DEPMOD) ]; then
        exit 0
 fi
 
-# older versions of depmod require the version string to start with three
-# numbers, so we cheat with a symlink here
-depmod_hack_needed=true
-tmp_dir=$(mktemp -d ${TMPDIR:-/tmp}/depmod.XXXXXX)
-mkdir -p "$tmp_dir/lib/modules/$KERNELRELEASE"
-if "$DEPMOD" -b "$tmp_dir" $KERNELRELEASE 2>/dev/null; then
-       if test -e "$tmp_dir/lib/modules/$KERNELRELEASE/modules.dep" -o \
-               -e "$tmp_dir/lib/modules/$KERNELRELEASE/modules.dep.bin"; then
-               depmod_hack_needed=false
-       fi
-fi
-rm -rf "$tmp_dir"
-if $depmod_hack_needed; then
-       symlink="$INSTALL_MOD_PATH/lib/modules/99.98.$KERNELRELEASE"
-       ln -s "$KERNELRELEASE" "$symlink"
-       KERNELRELEASE=99.98.$KERNELRELEASE
-fi
-
 set -- -ae -F System.map
 if test -n "$INSTALL_MOD_PATH"; then
        set -- "$@" -b "$INSTALL_MOD_PATH"
 fi
-"$DEPMOD" "$@" "$KERNELRELEASE"
-ret=$?
-
-if $depmod_hack_needed; then
-       rm -f "$symlink"
-fi
-
-exit $ret
+exec "$DEPMOD" "$@" "$KERNELRELEASE"
index 1db1889..07f6dc4 100755 (executable)
@@ -85,8 +85,7 @@ if arg_contain -S "$@"; then
        fi
 
        # For arch/powerpc/tools/gcc-check-mprofile-kernel.sh
-       if arg_contain -m64 "$@" && arg_contain -mlittle-endian "$@" &&
-               arg_contain -mprofile-kernel "$@"; then
+       if arg_contain -m64 "$@" && arg_contain -mprofile-kernel "$@"; then
                if ! test -t 0 && ! grep -q notrace; then
                        echo "_mcount"
                fi
index af1c961..4eee155 100644 (file)
@@ -93,11 +93,13 @@ endif
 %_defconfig: $(obj)/conf
        $(Q)$< $(silent) --defconfig=arch/$(SRCARCH)/configs/$@ $(Kconfig)
 
-configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/configs/$@)
+configfiles = $(wildcard $(srctree)/kernel/configs/$(1) $(srctree)/arch/$(SRCARCH)/configs/$(1))
+all-config-fragments = $(call configfiles,*.config)
+config-fragments = $(call configfiles,$@)
 
 %.config: $(obj)/conf
-       $(if $(call configfiles),, $(error No configuration exists for this target on this architecture))
-       $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles)
+       $(if $(config-fragments),, $(error $@ fragment does not exists on this architecture))
+       $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(config-fragments)
        $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
 
 PHONY += tinyconfig
@@ -115,6 +117,7 @@ clean-files += tests/.cache
 
 # Help text used by make help
 help:
+       @echo  'Configuration targets:'
        @echo  '  config          - Update current config utilising a line-oriented program'
        @echo  '  nconfig         - Update current config utilising a ncurses menu based program'
        @echo  '  menuconfig      - Update current config utilising a menu based program'
@@ -141,6 +144,12 @@ help:
        @echo  '                    default value without prompting'
        @echo  '  tinyconfig      - Configure the tiniest possible kernel'
        @echo  '  testconfig      - Run Kconfig unit tests (requires python3 and pytest)'
+       @echo  ''
+       @echo  'Configuration topic targets:'
+       @$(foreach f, $(all-config-fragments), \
+               if help=$$(grep -m1 '^# Help: ' $(f)); then \
+                       printf '  %-25s - %s\n' '$(notdir $(f))' "$${help#*: }"; \
+               fi;)
 
 # ===========================================================================
 # object files used by all kconfig flavours
index 992575f..4a6811d 100644 (file)
@@ -349,7 +349,11 @@ int conf_read_simple(const char *name, int def)
        char *p, *p2;
        struct symbol *sym;
        int i, def_flags;
+       const char *warn_unknown;
+       const char *werror;
 
+       warn_unknown = getenv("KCONFIG_WARN_UNKNOWN_SYMBOLS");
+       werror = getenv("KCONFIG_WERROR");
        if (name) {
                in = zconf_fopen(name);
        } else {
@@ -437,6 +441,10 @@ load:
                        if (def == S_DEF_USER) {
                                sym = sym_find(line + 2 + strlen(CONFIG_));
                                if (!sym) {
+                                       if (warn_unknown)
+                                               conf_warning("unknown symbol: %s",
+                                                            line + 2 + strlen(CONFIG_));
+
                                        conf_set_changed(true);
                                        continue;
                                }
@@ -471,7 +479,7 @@ load:
 
                        sym = sym_find(line + strlen(CONFIG_));
                        if (!sym) {
-                               if (def == S_DEF_AUTO)
+                               if (def == S_DEF_AUTO) {
                                        /*
                                         * Reading from include/config/auto.conf
                                         * If CONFIG_FOO previously existed in
@@ -479,8 +487,13 @@ load:
                                         * include/config/FOO must be touched.
                                         */
                                        conf_touch_dep(line + strlen(CONFIG_));
-                               else
+                               } else {
+                                       if (warn_unknown)
+                                               conf_warning("unknown symbol: %s",
+                                                            line + strlen(CONFIG_));
+
                                        conf_set_changed(true);
+                               }
                                continue;
                        }
 
@@ -519,6 +532,10 @@ load:
        }
        free(line);
        fclose(in);
+
+       if (conf_warnings && werror)
+               exit(1);
+
        return 0;
 }
 
index 9c9caca..4a9a23b 100644 (file)
@@ -275,7 +275,6 @@ struct jump_key {
        struct list_head entries;
        size_t offset;
        struct menu *target;
-       int index;
 };
 
 extern struct file *file_list;
index e7118d6..471a59a 100644 (file)
@@ -101,6 +101,7 @@ const char *menu_get_prompt(struct menu *menu);
 struct menu *menu_get_parent_menu(struct menu *menu);
 bool menu_has_help(struct menu *menu);
 const char *menu_get_help(struct menu *menu);
+int get_jump_key_char(void);
 struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head);
 void menu_get_ext_help(struct menu *menu, struct gstr *help);
 
index 347daf2..a501abf 100644 (file)
@@ -196,13 +196,9 @@ int first_alpha(const char *string, const char *exempt);
 int dialog_yesno(const char *title, const char *prompt, int height, int width);
 int dialog_msgbox(const char *title, const char *prompt, int height,
                  int width, int pause);
-
-
-typedef void (*update_text_fn)(char *buf, size_t start, size_t end, void
-                              *_data);
-int dialog_textbox(const char *title, char *tbuf, int initial_height,
-                  int initial_width, int *keys, int *_vscroll, int *_hscroll,
-                  update_text_fn update_text, void *data);
+int dialog_textbox(const char *title, const char *tbuf, int initial_height,
+                  int initial_width, int *_vscroll, int *_hscroll,
+                  int (*extra_key_cb)(int, size_t, size_t, void *), void *data);
 int dialog_menu(const char *title, const char *prompt,
                const void *selected, int *s_scroll);
 int dialog_checklist(const char *title, const char *prompt, int height,
index bc4d4fb..058ed0e 100644 (file)
@@ -10,8 +10,8 @@
 
 static int hscroll;
 static int begin_reached, end_reached, page_length;
-static char *buf;
-static char *page;
+static const char *buf, *page;
+static size_t start, end;
 
 /*
  * Go back 'n' lines in text. Called by dialog_textbox().
@@ -98,21 +98,10 @@ static void print_line(WINDOW *win, int row, int width)
 /*
  * Print a new page of text.
  */
-static void print_page(WINDOW *win, int height, int width, update_text_fn
-                      update_text, void *data)
+static void print_page(WINDOW *win, int height, int width)
 {
        int i, passed_end = 0;
 
-       if (update_text) {
-               char *end;
-
-               for (i = 0; i < height; i++)
-                       get_line();
-               end = page;
-               back_lines(height);
-               update_text(buf, page - buf, end - buf, data);
-       }
-
        page_length = 0;
        for (i = 0; i < height; i++) {
                print_line(win, i, width);
@@ -142,24 +131,26 @@ static void print_position(WINDOW *win)
  * refresh window content
  */
 static void refresh_text_box(WINDOW *dialog, WINDOW *box, int boxh, int boxw,
-                            int cur_y, int cur_x, update_text_fn update_text,
-                            void *data)
+                            int cur_y, int cur_x)
 {
-       print_page(box, boxh, boxw, update_text, data);
+       start = page - buf;
+
+       print_page(box, boxh, boxw);
        print_position(dialog);
        wmove(dialog, cur_y, cur_x);    /* Restore cursor position */
        wrefresh(dialog);
+
+       end = page - buf;
 }
 
 /*
  * Display text from a file in a dialog box.
  *
  * keys is a null-terminated array
- * update_text() may not add or remove any '\n' or '\0' in tbuf
  */
-int dialog_textbox(const char *title, char *tbuf, int initial_height,
-                  int initial_width, int *keys, int *_vscroll, int *_hscroll,
-                  update_text_fn update_text, void *data)
+int dialog_textbox(const char *title, const char *tbuf, int initial_height,
+                  int initial_width, int *_vscroll, int *_hscroll,
+                  int (*extra_key_cb)(int, size_t, size_t, void *), void *data)
 {
        int i, x, y, cur_x, cur_y, key = 0;
        int height, width, boxh, boxw;
@@ -239,8 +230,7 @@ do_resize:
 
        /* Print first page of text */
        attr_clear(box, boxh, boxw, dlg.dialog.atr);
-       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x, update_text,
-                        data);
+       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
 
        while (!done) {
                key = wgetch(dialog);
@@ -259,8 +249,7 @@ do_resize:
                                begin_reached = 1;
                                page = buf;
                                refresh_text_box(dialog, box, boxh, boxw,
-                                                cur_y, cur_x, update_text,
-                                                data);
+                                                cur_y, cur_x);
                        }
                        break;
                case 'G':       /* Last page */
@@ -270,8 +259,7 @@ do_resize:
                        /* point to last char in buf */
                        page = buf + strlen(buf);
                        back_lines(boxh);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case 'K':       /* Previous line */
                case 'k':
@@ -280,8 +268,7 @@ do_resize:
                                break;
 
                        back_lines(page_length + 1);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case 'B':       /* Previous page */
                case 'b':
@@ -290,8 +277,7 @@ do_resize:
                        if (begin_reached)
                                break;
                        back_lines(page_length + boxh);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case 'J':       /* Next line */
                case 'j':
@@ -300,8 +286,7 @@ do_resize:
                                break;
 
                        back_lines(page_length - 1);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case KEY_NPAGE: /* Next page */
                case ' ':
@@ -310,8 +295,7 @@ do_resize:
                                break;
 
                        begin_reached = 0;
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case '0':       /* Beginning of line */
                case 'H':       /* Scroll left */
@@ -326,8 +310,7 @@ do_resize:
                                hscroll--;
                        /* Reprint current page to scroll horizontally */
                        back_lines(page_length);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case 'L':       /* Scroll right */
                case 'l':
@@ -337,8 +320,7 @@ do_resize:
                        hscroll++;
                        /* Reprint current page to scroll horizontally */
                        back_lines(page_length);
-                       refresh_text_box(dialog, box, boxh, boxw, cur_y,
-                                        cur_x, update_text, data);
+                       refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
                        break;
                case KEY_ESC:
                        if (on_key_esc(dialog) == KEY_ESC)
@@ -351,11 +333,9 @@ do_resize:
                        on_key_resize();
                        goto do_resize;
                default:
-                       for (i = 0; keys[i]; i++) {
-                               if (key == keys[i]) {
-                                       done = true;
-                                       break;
-                               }
+                       if (extra_key_cb && extra_key_cb(key, start, end, data)) {
+                               done = true;
+                               break;
                        }
                }
        }
index 53d8834..eccc87a 100644 (file)
@@ -22,8 +22,6 @@
 #include "lkc.h"
 #include "lxdialog/dialog.h"
 
-#define JUMP_NB                        9
-
 static const char mconf_readme[] =
 "Overview\n"
 "--------\n"
@@ -288,6 +286,7 @@ static int single_menu_mode;
 static int show_all_options;
 static int save_and_exit;
 static int silent;
+static int jump_key_char;
 
 static void conf(struct menu *menu, struct menu *active_menu);
 
@@ -348,19 +347,19 @@ static void reset_subtitle(void)
        set_dialog_subtitles(subtitles);
 }
 
-static int show_textbox_ext(const char *title, char *text, int r, int c, int
-                           *keys, int *vscroll, int *hscroll, update_text_fn
-                           update_text, void *data)
+static int show_textbox_ext(const char *title, const char *text, int r, int c,
+                           int *vscroll, int *hscroll,
+                           int (*extra_key_cb)(int, size_t, size_t, void *),
+                           void *data)
 {
        dialog_clear();
-       return dialog_textbox(title, text, r, c, keys, vscroll, hscroll,
-                             update_text, data);
+       return dialog_textbox(title, text, r, c, vscroll, hscroll,
+                             extra_key_cb, data);
 }
 
 static void show_textbox(const char *title, const char *text, int r, int c)
 {
-       show_textbox_ext(title, (char *) text, r, c, (int []) {0}, NULL, NULL,
-                        NULL, NULL);
+       show_textbox_ext(title, text, r, c, NULL, NULL, NULL, NULL);
 }
 
 static void show_helptext(const char *title, const char *text)
@@ -381,35 +380,54 @@ static void show_help(struct menu *menu)
 
 struct search_data {
        struct list_head *head;
-       struct menu **targets;
-       int *keys;
+       struct menu *target;
 };
 
-static void update_text(char *buf, size_t start, size_t end, void *_data)
+static int next_jump_key(int key)
+{
+       if (key < '1' || key > '9')
+               return '1';
+
+       key++;
+
+       if (key > '9')
+               key = '1';
+
+       return key;
+}
+
+static int handle_search_keys(int key, size_t start, size_t end, void *_data)
 {
        struct search_data *data = _data;
        struct jump_key *pos;
-       int k = 0;
+       int index = 0;
+
+       if (key < '1' || key > '9')
+               return 0;
 
        list_for_each_entry(pos, data->head, entries) {
-               if (pos->offset >= start && pos->offset < end) {
-                       char header[4];
+               index = next_jump_key(index);
 
-                       if (k < JUMP_NB) {
-                               int key = '0' + (pos->index % JUMP_NB) + 1;
+               if (pos->offset < start)
+                       continue;
 
-                               sprintf(header, "(%c)", key);
-                               data->keys[k] = key;
-                               data->targets[k] = pos->target;
-                               k++;
-                       } else {
-                               sprintf(header, "   ");
-                       }
+               if (pos->offset >= end)
+                       break;
 
-                       memcpy(buf + pos->offset, header, sizeof(header) - 1);
+               if (key == index) {
+                       data->target = pos->target;
+                       return 1;
                }
        }
-       data->keys[k] = 0;
+
+       return 0;
+}
+
+int get_jump_key_char(void)
+{
+       jump_key_char = next_jump_key(jump_key_char);
+
+       return jump_key_char;
 }
 
 static void search_conf(void)
@@ -456,26 +474,23 @@ again:
        sym_arr = sym_re_search(dialog_input);
        do {
                LIST_HEAD(head);
-               struct menu *targets[JUMP_NB];
-               int keys[JUMP_NB + 1], i;
                struct search_data data = {
                        .head = &head,
-                       .targets = targets,
-                       .keys = keys,
                };
                struct jump_key *pos, *tmp;
 
+               jump_key_char = 0;
                res = get_relations_str(sym_arr, &head);
                set_subtitle();
                dres = show_textbox_ext("Search Results", str_get(&res), 0, 0,
-                                       keys, &vscroll, &hscroll, &update_text,
-                                       &data);
+                                       &vscroll, &hscroll,
+                                       handle_search_keys, &data);
                again = false;
-               for (i = 0; i < JUMP_NB && keys[i]; i++)
-                       if (dres == keys[i]) {
-                               conf(targets[i]->parent, targets[i]);
-                               again = true;
-                       }
+               if (dres >= '1' && dres <= '9') {
+                       assert(data.target != NULL);
+                       conf(data.target->parent, data.target);
+                       again = true;
+               }
                str_free(&res);
                list_for_each_entry_safe(pos, tmp, &head, entries)
                        free(pos);
index b90fff8..61c442d 100644 (file)
@@ -701,6 +701,11 @@ static void get_dep_str(struct gstr *r, struct expr *expr, const char *prefix)
        }
 }
 
+int __attribute__((weak)) get_jump_key_char(void)
+{
+       return -1;
+}
+
 static void get_prompt_str(struct gstr *r, struct property *prop,
                           struct list_head *head)
 {
@@ -730,24 +735,27 @@ static void get_prompt_str(struct gstr *r, struct property *prop,
        }
        if (head && location) {
                jump = xmalloc(sizeof(struct jump_key));
-
                jump->target = location;
-
-               if (list_empty(head))
-                       jump->index = 0;
-               else
-                       jump->index = list_entry(head->prev, struct jump_key,
-                                                entries)->index + 1;
-
                list_add_tail(&jump->entries, head);
        }
 
        str_printf(r, "  Location:\n");
-       for (j = 4; --i >= 0; j += 2) {
+       for (j = 0; --i >= 0; j++) {
+               int jk = -1;
+               int indent = 2 * j + 4;
+
                menu = submenu[i];
-               if (jump && menu == location)
+               if (jump && menu == location) {
                        jump->offset = strlen(r->s);
-               str_printf(r, "%*c-> %s", j, ' ', menu_get_prompt(menu));
+                       jk = get_jump_key_char();
+               }
+
+               if (jk >= 0) {
+                       str_printf(r, "(%c)", jk);
+                       indent -= 3;
+               }
+
+               str_printf(r, "%*c-> %s", indent, ' ', menu_get_prompt(menu));
                if (menu->sym) {
                        str_printf(r, " (%s [=%s])", menu->sym->name ?
                                menu->sym->name : "<choice>",
index 3ba8b1a..143a2c3 100644 (file)
@@ -220,7 +220,7 @@ search_help[] =
 "Location:\n"
 "  -> Bus options (PCI, PCMCIA, EISA, ISA)\n"
 "    -> PCI support (PCI [ = y])\n"
-"      -> PCI access mode (<choice> [ = y])\n"
+"(1)   -> PCI access mode (<choice> [ = y])\n"
 "Selects: LIBCRC32\n"
 "Selected by: BAR\n"
 "-----------------------------------------------------------------\n"
@@ -231,9 +231,13 @@ search_help[] =
 "o  The 'Depends on:' line lists symbols that need to be defined for\n"
 "   this symbol to be visible and selectable in the menu.\n"
 "o  The 'Location:' lines tell, where in the menu structure this symbol\n"
-"   is located.  A location followed by a [ = y] indicates that this is\n"
-"   a selectable menu item, and the current value is displayed inside\n"
-"   brackets.\n"
+"   is located.\n"
+"     A location followed by a [ = y] indicates that this is\n"
+"     a selectable menu item, and the current value is displayed inside\n"
+"     brackets.\n"
+"     Press the key in the (#) prefix to jump directly to that\n"
+"     location. You will be returned to the current search results\n"
+"     after exiting this new menu.\n"
 "o  The 'Selects:' line tells, what symbol will be automatically selected\n"
 "   if this symbol is selected (y or m).\n"
 "o  The 'Selected by' line tells what symbol has selected this symbol.\n"
@@ -275,7 +279,9 @@ static const char *current_instructions = menu_instructions;
 
 static char *dialog_input_result;
 static int dialog_input_result_len;
+static int jump_key_char;
 
+static void selected_conf(struct menu *menu, struct menu *active_menu);
 static void conf(struct menu *menu);
 static void conf_choice(struct menu *menu);
 static void conf_string(struct menu *menu);
@@ -685,6 +691,57 @@ static int do_exit(void)
        return 0;
 }
 
+struct search_data {
+       struct list_head *head;
+       struct menu *target;
+};
+
+static int next_jump_key(int key)
+{
+       if (key < '1' || key > '9')
+               return '1';
+
+       key++;
+
+       if (key > '9')
+               key = '1';
+
+       return key;
+}
+
+static int handle_search_keys(int key, size_t start, size_t end, void *_data)
+{
+       struct search_data *data = _data;
+       struct jump_key *pos;
+       int index = 0;
+
+       if (key < '1' || key > '9')
+               return 0;
+
+       list_for_each_entry(pos, data->head, entries) {
+               index = next_jump_key(index);
+
+               if (pos->offset < start)
+                       continue;
+
+               if (pos->offset >= end)
+                       break;
+
+               if (key == index) {
+                       data->target = pos->target;
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+int get_jump_key_char(void)
+{
+       jump_key_char = next_jump_key(jump_key_char);
+
+       return jump_key_char;
+}
 
 static void search_conf(void)
 {
@@ -692,7 +749,8 @@ static void search_conf(void)
        struct gstr res;
        struct gstr title;
        char *dialog_input;
-       int dres;
+       int dres, vscroll = 0, hscroll = 0;
+       bool again;
 
        title = str_new();
        str_printf( &title, "Enter (sub)string or regexp to search for "
@@ -721,11 +779,28 @@ again:
                dialog_input += strlen(CONFIG_);
 
        sym_arr = sym_re_search(dialog_input);
-       res = get_relations_str(sym_arr, NULL);
+
+       do {
+               LIST_HEAD(head);
+               struct search_data data = {
+                       .head = &head,
+                       .target = NULL,
+               };
+               jump_key_char = 0;
+               res = get_relations_str(sym_arr, &head);
+               dres = show_scroll_win_ext(main_window,
+                               "Search Results", str_get(&res),
+                               &vscroll, &hscroll,
+                               handle_search_keys, &data);
+               again = false;
+               if (dres >= '1' && dres <= '9') {
+                       assert(data.target != NULL);
+                       selected_conf(data.target->parent, data.target);
+                       again = true;
+               }
+               str_free(&res);
+       } while (again);
        free(sym_arr);
-       show_scroll_win(main_window,
-                       "Search Results", str_get(&res));
-       str_free(&res);
        str_free(&title);
 }
 
@@ -1063,9 +1138,14 @@ static int do_match(int key, struct match_state *state, int *ans)
 
 static void conf(struct menu *menu)
 {
+       selected_conf(menu, NULL);
+}
+
+static void selected_conf(struct menu *menu, struct menu *active_menu)
+{
        struct menu *submenu = NULL;
        struct symbol *sym;
-       int res;
+       int i, res;
        int current_index = 0;
        int last_top_row = 0;
        struct match_state match_state = {
@@ -1081,6 +1161,19 @@ static void conf(struct menu *menu)
                if (!child_count)
                        break;
 
+               if (active_menu != NULL) {
+                       for (i = 0; i < items_num; i++) {
+                               struct mitem *mcur;
+
+                               mcur = (struct mitem *) item_userptr(curses_menu_items[i]);
+                               if ((struct menu *) mcur->usrptr == active_menu) {
+                                       current_index = i;
+                                       break;
+                               }
+                       }
+                       active_menu = NULL;
+               }
+
                show_menu(menu_get_prompt(menu), menu_instructions,
                          current_index, &last_top_row);
                keypad((menu_win(curses_menu)), TRUE);
index 9aedf40..25a7263 100644 (file)
@@ -497,11 +497,18 @@ void refresh_all_windows(WINDOW *main_window)
        refresh();
 }
 
-/* layman's scrollable window... */
 void show_scroll_win(WINDOW *main_window,
                const char *title,
                const char *text)
 {
+       (void)show_scroll_win_ext(main_window, title, (char *)text, NULL, NULL, NULL, NULL);
+}
+
+/* layman's scrollable window... */
+int show_scroll_win_ext(WINDOW *main_window, const char *title, char *text,
+                       int *vscroll, int *hscroll,
+                       extra_key_cb_fn extra_key_cb, void *data)
+{
        int res;
        int total_lines = get_line_no(text);
        int x, y, lines, columns;
@@ -514,6 +521,12 @@ void show_scroll_win(WINDOW *main_window,
        WINDOW *win;
        WINDOW *pad;
        PANEL *panel;
+       bool done = false;
+
+       if (hscroll)
+               start_x = *hscroll;
+       if (vscroll)
+               start_y = *vscroll;
 
        getmaxyx(stdscr, lines, columns);
 
@@ -549,8 +562,7 @@ void show_scroll_win(WINDOW *main_window,
        panel = new_panel(win);
 
        /* handle scrolling */
-       do {
-
+       while (!done) {
                copywin(pad, win, start_y, start_x, 2, 2, text_lines,
                                text_cols, 0);
                print_in_middle(win,
@@ -593,8 +605,18 @@ void show_scroll_win(WINDOW *main_window,
                case 'l':
                        start_x++;
                        break;
+               default:
+                       if (extra_key_cb) {
+                               size_t start = (get_line(text, start_y) - text);
+                               size_t end = (get_line(text, start_y + text_lines) - text);
+
+                               if (extra_key_cb(res, start, end, data)) {
+                                       done = true;
+                                       break;
+                               }
+                       }
                }
-               if (res == 10 || res == 27 || res == 'q' ||
+               if (res == 0 || res == 10 || res == 27 || res == 'q' ||
                        res == KEY_F(F_HELP) || res == KEY_F(F_BACK) ||
                        res == KEY_F(F_EXIT))
                        break;
@@ -606,9 +628,14 @@ void show_scroll_win(WINDOW *main_window,
                        start_x = 0;
                if (start_x >= total_cols-text_cols)
                        start_x = total_cols-text_cols;
-       } while (res);
+       }
 
+       if (hscroll)
+               *hscroll = start_x;
+       if (vscroll)
+               *vscroll = start_y;
        del_panel(panel);
        delwin(win);
        refresh_all_windows(main_window);
+       return res;
 }
index 6f925bc..ab836d5 100644 (file)
@@ -67,6 +67,8 @@ typedef enum {
 
 void set_colors(void);
 
+typedef int (*extra_key_cb_fn)(int, size_t, size_t, void *);
+
 /* this changes the windows attributes !!! */
 void print_in_middle(WINDOW *win, int y, int width, const char *str, int attrs);
 int get_line_length(const char *line);
@@ -78,6 +80,9 @@ int dialog_inputbox(WINDOW *main_window,
                const char *title, const char *prompt,
                const char *init, char **resultp, int *result_len);
 void refresh_all_windows(WINDOW *main_window);
+int show_scroll_win_ext(WINDOW *main_window, const char *title, char *text,
+                       int *vscroll, int *hscroll,
+                       extra_key_cb_fn extra_key_cb, void *data);
 void show_scroll_win(WINDOW *main_window,
                const char *title,
                const char *text);
index 748da57..d1f5bcf 100644 (file)
@@ -396,6 +396,9 @@ static char *eval_clause(const char *str, size_t len, int argc, char *argv[])
 
                p++;
        }
+
+       if (new_argc >= FUNCTION_MAX_ARGS)
+               pperror("too many function arguments");
        new_argv[new_argc++] = prev;
 
        /*
index 117f36e..0e113b0 100755 (executable)
@@ -5,7 +5,8 @@ cflags=$1
 libs=$2
 bin=$3
 
-PKG="Qt5Core Qt5Gui Qt5Widgets"
+PKG5="Qt5Core Qt5Gui Qt5Widgets"
+PKG6="Qt6Core Qt6Gui Qt6Widgets"
 
 if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
        echo >&2 "*"
@@ -14,16 +15,26 @@ if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
        exit 1
 fi
 
-if ${HOSTPKG_CONFIG} --exists $PKG; then
-       ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags}
-       ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs}
+if ${HOSTPKG_CONFIG} --exists $PKG6; then
+       ${HOSTPKG_CONFIG} --cflags ${PKG6} > ${cflags}
+       # Qt6 requires C++17.
+       echo -std=c++17 >> ${cflags}
+       ${HOSTPKG_CONFIG} --libs ${PKG6} > ${libs}
+       ${HOSTPKG_CONFIG} --variable=libexecdir Qt6Core > ${bin}
+       exit 0
+fi
+
+if ${HOSTPKG_CONFIG} --exists $PKG5; then
+       ${HOSTPKG_CONFIG} --cflags ${PKG5} > ${cflags}
+       ${HOSTPKG_CONFIG} --libs ${PKG5} > ${libs}
        ${HOSTPKG_CONFIG} --variable=host_bins Qt5Core > ${bin}
        exit 0
 fi
 
 echo >&2 "*"
-echo >&2 "* Could not find Qt5 via ${HOSTPKG_CONFIG}."
-echo >&2 "* Please install Qt5 and make sure it's in PKG_CONFIG_PATH"
-echo >&2 "* You need $PKG"
+echo >&2 "* Could not find Qt6 or Qt5 via ${HOSTPKG_CONFIG}."
+echo >&2 "* Please install Qt6 or Qt5 and make sure it's in PKG_CONFIG_PATH"
+echo >&2 "* You need $PKG6 for Qt6"
+echo >&2 "* You need $PKG5 for Qt5"
 echo >&2 "*"
 exit 1
index 78087b2..620a352 100644 (file)
@@ -5,10 +5,10 @@
  */
 
 #include <QAction>
+#include <QActionGroup>
 #include <QApplication>
 #include <QCloseEvent>
 #include <QDebug>
-#include <QDesktopWidget>
 #include <QFileDialog>
 #include <QLabel>
 #include <QLayout>
@@ -16,6 +16,8 @@
 #include <QMenu>
 #include <QMenuBar>
 #include <QMessageBox>
+#include <QRegularExpression>
+#include <QScreen>
 #include <QToolBar>
 
 #include <stdlib.h>
@@ -1126,7 +1128,7 @@ QString ConfigInfoView::debug_info(struct symbol *sym)
 
 QString ConfigInfoView::print_filter(const QString &str)
 {
-       QRegExp re("[<>&\"\\n]");
+       QRegularExpression re("[<>&\"\\n]");
        QString res = str;
        for (int i = 0; (i = res.indexOf(re, i)) >= 0;) {
                switch (res[i].toLatin1()) {
@@ -1322,15 +1324,15 @@ ConfigMainWindow::ConfigMainWindow(void)
        int width, height;
        char title[256];
 
-       QDesktopWidget *d = configApp->desktop();
        snprintf(title, sizeof(title), "%s%s",
                rootmenu.prompt->text,
                ""
                );
        setWindowTitle(title);
 
-       width = configSettings->value("/window width", d->width() - 64).toInt();
-       height = configSettings->value("/window height", d->height() - 64).toInt();
+       QRect g = configApp->primaryScreen()->geometry();
+       width = configSettings->value("/window width", g.width() - 64).toInt();
+       height = configSettings->value("/window height", g.height() - 64).toInt();
        resize(width, height);
        x = configSettings->value("/window x");
        y = configSettings->value("/window y");
@@ -1379,17 +1381,17 @@ ConfigMainWindow::ConfigMainWindow(void)
                this, &ConfigMainWindow::goBack);
 
        QAction *quitAction = new QAction("&Quit", this);
-       quitAction->setShortcut(Qt::CTRL + Qt::Key_Q);
+       quitAction->setShortcut(Qt::CTRL | Qt::Key_Q);
        connect(quitAction, &QAction::triggered,
                this, &ConfigMainWindow::close);
 
        QAction *loadAction = new QAction(QPixmap(xpm_load), "&Load", this);
-       loadAction->setShortcut(Qt::CTRL + Qt::Key_L);
+       loadAction->setShortcut(Qt::CTRL | Qt::Key_L);
        connect(loadAction, &QAction::triggered,
                this, &ConfigMainWindow::loadConfig);
 
        saveAction = new QAction(QPixmap(xpm_save), "&Save", this);
-       saveAction->setShortcut(Qt::CTRL + Qt::Key_S);
+       saveAction->setShortcut(Qt::CTRL | Qt::Key_S);
        connect(saveAction, &QAction::triggered,
                this, &ConfigMainWindow::saveConfig);
 
@@ -1403,7 +1405,7 @@ ConfigMainWindow::ConfigMainWindow(void)
        connect(saveAsAction, &QAction::triggered,
                this, &ConfigMainWindow::saveConfigAs);
        QAction *searchAction = new QAction("&Find", this);
-       searchAction->setShortcut(Qt::CTRL + Qt::Key_F);
+       searchAction->setShortcut(Qt::CTRL | Qt::Key_F);
        connect(searchAction, &QAction::triggered,
                this, &ConfigMainWindow::searchConfig);
        singleViewAction = new QAction(QPixmap(xpm_single_view), "Single View", this);
@@ -1750,11 +1752,21 @@ void ConfigMainWindow::closeEvent(QCloseEvent* e)
                e->accept();
                return;
        }
-       QMessageBox mb("qconf", "Save configuration?", QMessageBox::Warning,
-                       QMessageBox::Yes | QMessageBox::Default, QMessageBox::No, QMessageBox::Cancel | QMessageBox::Escape);
-       mb.setButtonText(QMessageBox::Yes, "&Save Changes");
-       mb.setButtonText(QMessageBox::No, "&Discard Changes");
-       mb.setButtonText(QMessageBox::Cancel, "Cancel Exit");
+
+       QMessageBox mb(QMessageBox::Icon::Warning, "qconf",
+                      "Save configuration?");
+
+       QPushButton *yb = mb.addButton(QMessageBox::Yes);
+       QPushButton *db = mb.addButton(QMessageBox::No);
+       QPushButton *cb = mb.addButton(QMessageBox::Cancel);
+
+       yb->setText("&Save Changes");
+       db->setText("&Discard Changes");
+       cb->setText("Cancel Exit");
+
+       mb.setDefaultButton(yb);
+       mb.setEscapeButton(cb);
+
        switch (mb.exec()) {
        case QMessageBox::Yes:
                if (saveConfig())
index b29b297..34a5386 100644 (file)
@@ -24,6 +24,7 @@
 #include "../../include/linux/license.h"
 #include "../../include/linux/module_symbol.h"
 
+static bool module_enabled;
 /* Are we using CONFIG_MODVERSIONS? */
 static bool modversions;
 /* Is CONFIG_MODULE_SRCVERSION_ALL set? */
@@ -761,6 +762,7 @@ static const char *const section_white_list[] =
        ".fmt_slot*",                   /* EZchip */
        ".gnu.lto*",
        ".discard.*",
+       ".llvm.call-graph-profile",     /* call graph */
        NULL
 };
 
@@ -1242,7 +1244,7 @@ static void check_section_mismatch(struct module *mod, struct elf_info *elf,
        const char *tosec = sec_name(elf, get_secindex(elf, sym));
        const struct sectioncheck *mismatch;
 
-       if (elf->export_symbol_secndx == fsecndx) {
+       if (module_enabled && elf->export_symbol_secndx == fsecndx) {
                check_export_symbol(mod, elf, faddr, tosec, sym);
                return;
        }
@@ -1256,21 +1258,16 @@ static void check_section_mismatch(struct module *mod, struct elf_info *elf,
                                 tosec, taddr);
 }
 
-static int addend_386_rel(uint32_t *location, Elf_Rela *r)
+static Elf_Addr addend_386_rel(uint32_t *location, unsigned int r_type)
 {
-       unsigned int r_typ = ELF_R_TYPE(r->r_info);
-
-       switch (r_typ) {
+       switch (r_type) {
        case R_386_32:
-               r->r_addend = TO_NATIVE(*location);
-               break;
+               return TO_NATIVE(*location);
        case R_386_PC32:
-               r->r_addend = TO_NATIVE(*location) + 4;
-               break;
-       default:
-               r->r_addend = (Elf_Addr)(-1);
+               return TO_NATIVE(*location) + 4;
        }
-       return 0;
+
+       return (Elf_Addr)(-1);
 }
 
 #ifndef R_ARM_CALL
@@ -1314,32 +1311,28 @@ static int32_t sign_extend32(int32_t value, int index)
        return (int32_t)(value << shift) >> shift;
 }
 
-static int addend_arm_rel(void *loc, Elf_Sym *sym, Elf_Rela *r)
+static Elf_Addr addend_arm_rel(void *loc, Elf_Sym *sym, unsigned int r_type)
 {
-       unsigned int r_typ = ELF_R_TYPE(r->r_info);
        uint32_t inst, upper, lower, sign, j1, j2;
        int32_t offset;
 
-       switch (r_typ) {
+       switch (r_type) {
        case R_ARM_ABS32:
        case R_ARM_REL32:
                inst = TO_NATIVE(*(uint32_t *)loc);
-               r->r_addend = inst + sym->st_value;
-               break;
+               return inst + sym->st_value;
        case R_ARM_MOVW_ABS_NC:
        case R_ARM_MOVT_ABS:
                inst = TO_NATIVE(*(uint32_t *)loc);
                offset = sign_extend32(((inst & 0xf0000) >> 4) | (inst & 0xfff),
                                       15);
-               r->r_addend = offset + sym->st_value;
-               break;
+               return offset + sym->st_value;
        case R_ARM_PC24:
        case R_ARM_CALL:
        case R_ARM_JUMP24:
                inst = TO_NATIVE(*(uint32_t *)loc);
                offset = sign_extend32((inst & 0x00ffffff) << 2, 25);
-               r->r_addend = offset + sym->st_value + 8;
-               break;
+               return offset + sym->st_value + 8;
        case R_ARM_THM_MOVW_ABS_NC:
        case R_ARM_THM_MOVT_ABS:
                upper = TO_NATIVE(*(uint16_t *)loc);
@@ -1349,8 +1342,7 @@ static int addend_arm_rel(void *loc, Elf_Sym *sym, Elf_Rela *r)
                                       ((lower & 0x7000) >> 4) |
                                       (lower & 0x00ff),
                                       15);
-               r->r_addend = offset + sym->st_value;
-               break;
+               return offset + sym->st_value;
        case R_ARM_THM_JUMP19:
                /*
                 * Encoding T3:
@@ -1371,8 +1363,7 @@ static int addend_arm_rel(void *loc, Elf_Sym *sym, Elf_Rela *r)
                                       ((upper & 0x03f) << 12) |
                                       ((lower & 0x07ff) << 1),
                                       20);
-               r->r_addend = offset + sym->st_value + 4;
-               break;
+               return offset + sym->st_value + 4;
        case R_ARM_THM_CALL:
        case R_ARM_THM_JUMP24:
                /*
@@ -1398,34 +1389,26 @@ static int addend_arm_rel(void *loc, Elf_Sym *sym, Elf_Rela *r)
                                       ((upper & 0x03ff) << 12) |
                                       ((lower & 0x07ff) << 1),
                                       24);
-               r->r_addend = offset + sym->st_value + 4;
-               break;
-       default:
-               r->r_addend = (Elf_Addr)(-1);
+               return offset + sym->st_value + 4;
        }
-       return 0;
+
+       return (Elf_Addr)(-1);
 }
 
-static int addend_mips_rel(uint32_t *location, Elf_Rela *r)
+static Elf_Addr addend_mips_rel(uint32_t *location, unsigned int r_type)
 {
-       unsigned int r_typ = ELF_R_TYPE(r->r_info);
        uint32_t inst;
 
        inst = TO_NATIVE(*location);
-       switch (r_typ) {
+       switch (r_type) {
        case R_MIPS_LO16:
-               r->r_addend = inst & 0xffff;
-               break;
+               return inst & 0xffff;
        case R_MIPS_26:
-               r->r_addend = (inst & 0x03ffffff) << 2;
-               break;
+               return (inst & 0x03ffffff) << 2;
        case R_MIPS_32:
-               r->r_addend = inst;
-               break;
-       default:
-               r->r_addend = (Elf_Addr)(-1);
+               return inst;
        }
-       return 0;
+       return (Elf_Addr)(-1);
 }
 
 #ifndef EM_RISCV
@@ -1444,12 +1427,45 @@ static int addend_mips_rel(uint32_t *location, Elf_Rela *r)
 #define R_LARCH_SUB32          55
 #endif
 
+static void get_rel_type_and_sym(struct elf_info *elf, uint64_t r_info,
+                                unsigned int *r_type, unsigned int *r_sym)
+{
+       typedef struct {
+               Elf64_Word    r_sym;    /* Symbol index */
+               unsigned char r_ssym;   /* Special symbol for 2nd relocation */
+               unsigned char r_type3;  /* 3rd relocation type */
+               unsigned char r_type2;  /* 2nd relocation type */
+               unsigned char r_type;   /* 1st relocation type */
+       } Elf64_Mips_R_Info;
+
+       bool is_64bit = (elf->hdr->e_ident[EI_CLASS] == ELFCLASS64);
+
+       if (elf->hdr->e_machine == EM_MIPS && is_64bit) {
+               Elf64_Mips_R_Info *mips64_r_info = (void *)&r_info;
+
+               *r_type = mips64_r_info->r_type;
+               *r_sym = TO_NATIVE(mips64_r_info->r_sym);
+               return;
+       }
+
+       if (is_64bit) {
+               Elf64_Xword r_info64 = r_info;
+
+               r_info = TO_NATIVE(r_info64);
+       } else {
+               Elf32_Word r_info32 = r_info;
+
+               r_info = TO_NATIVE(r_info32);
+       }
+
+       *r_type = ELF_R_TYPE(r_info);
+       *r_sym = ELF_R_SYM(r_info);
+}
+
 static void section_rela(struct module *mod, struct elf_info *elf,
                         Elf_Shdr *sechdr)
 {
        Elf_Rela *rela;
-       Elf_Rela r;
-       unsigned int r_sym;
        unsigned int fsecndx = sechdr->sh_info;
        const char *fromsec = sec_name(elf, fsecndx);
        Elf_Rela *start = (void *)elf->hdr + sechdr->sh_offset;
@@ -1460,38 +1476,29 @@ static void section_rela(struct module *mod, struct elf_info *elf,
                return;
 
        for (rela = start; rela < stop; rela++) {
-               r.r_offset = TO_NATIVE(rela->r_offset);
-#if KERNEL_ELFCLASS == ELFCLASS64
-               if (elf->hdr->e_machine == EM_MIPS) {
-                       unsigned int r_typ;
-                       r_sym = ELF64_MIPS_R_SYM(rela->r_info);
-                       r_sym = TO_NATIVE(r_sym);
-                       r_typ = ELF64_MIPS_R_TYPE(rela->r_info);
-                       r.r_info = ELF64_R_INFO(r_sym, r_typ);
-               } else {
-                       r.r_info = TO_NATIVE(rela->r_info);
-                       r_sym = ELF_R_SYM(r.r_info);
-               }
-#else
-               r.r_info = TO_NATIVE(rela->r_info);
-               r_sym = ELF_R_SYM(r.r_info);
-#endif
-               r.r_addend = TO_NATIVE(rela->r_addend);
+               Elf_Addr taddr, r_offset;
+               unsigned int r_type, r_sym;
+
+               r_offset = TO_NATIVE(rela->r_offset);
+               get_rel_type_and_sym(elf, rela->r_info, &r_type, &r_sym);
+
+               taddr = TO_NATIVE(rela->r_addend);
+
                switch (elf->hdr->e_machine) {
                case EM_RISCV:
                        if (!strcmp("__ex_table", fromsec) &&
-                           ELF_R_TYPE(r.r_info) == R_RISCV_SUB32)
+                           r_type == R_RISCV_SUB32)
                                continue;
                        break;
                case EM_LOONGARCH:
                        if (!strcmp("__ex_table", fromsec) &&
-                           ELF_R_TYPE(r.r_info) == R_LARCH_SUB32)
+                           r_type == R_LARCH_SUB32)
                                continue;
                        break;
                }
 
                check_section_mismatch(mod, elf, elf->symtab_start + r_sym,
-                                      fsecndx, fromsec, r.r_offset, r.r_addend);
+                                      fsecndx, fromsec, r_offset, taddr);
        }
 }
 
@@ -1499,8 +1506,6 @@ static void section_rel(struct module *mod, struct elf_info *elf,
                        Elf_Shdr *sechdr)
 {
        Elf_Rel *rel;
-       Elf_Rela r;
-       unsigned int r_sym;
        unsigned int fsecndx = sechdr->sh_info;
        const char *fromsec = sec_name(elf, fsecndx);
        Elf_Rel *start = (void *)elf->hdr + sechdr->sh_offset;
@@ -1512,45 +1517,32 @@ static void section_rel(struct module *mod, struct elf_info *elf,
 
        for (rel = start; rel < stop; rel++) {
                Elf_Sym *tsym;
+               Elf_Addr taddr = 0, r_offset;
+               unsigned int r_type, r_sym;
                void *loc;
 
-               r.r_offset = TO_NATIVE(rel->r_offset);
-#if KERNEL_ELFCLASS == ELFCLASS64
-               if (elf->hdr->e_machine == EM_MIPS) {
-                       unsigned int r_typ;
-                       r_sym = ELF64_MIPS_R_SYM(rel->r_info);
-                       r_sym = TO_NATIVE(r_sym);
-                       r_typ = ELF64_MIPS_R_TYPE(rel->r_info);
-                       r.r_info = ELF64_R_INFO(r_sym, r_typ);
-               } else {
-                       r.r_info = TO_NATIVE(rel->r_info);
-                       r_sym = ELF_R_SYM(r.r_info);
-               }
-#else
-               r.r_info = TO_NATIVE(rel->r_info);
-               r_sym = ELF_R_SYM(r.r_info);
-#endif
-               r.r_addend = 0;
+               r_offset = TO_NATIVE(rel->r_offset);
+               get_rel_type_and_sym(elf, rel->r_info, &r_type, &r_sym);
 
-               loc = sym_get_data_by_offset(elf, fsecndx, r.r_offset);
+               loc = sym_get_data_by_offset(elf, fsecndx, r_offset);
                tsym = elf->symtab_start + r_sym;
 
                switch (elf->hdr->e_machine) {
                case EM_386:
-                       addend_386_rel(loc, &r);
+                       taddr = addend_386_rel(loc, r_type);
                        break;
                case EM_ARM:
-                       addend_arm_rel(loc, tsym, &r);
+                       taddr = addend_arm_rel(loc, tsym, r_type);
                        break;
                case EM_MIPS:
-                       addend_mips_rel(loc, &r);
+                       taddr = addend_mips_rel(loc, r_type);
                        break;
                default:
                        fatal("Please add code to calculate addend for this architecture\n");
                }
 
                check_section_mismatch(mod, elf, tsym,
-                                      fsecndx, fromsec, r.r_offset, r.r_addend);
+                                      fsecndx, fromsec, r_offset, taddr);
        }
 }
 
@@ -2272,7 +2264,7 @@ int main(int argc, char **argv)
        LIST_HEAD(dump_lists);
        struct dump_list *dl, *dl2;
 
-       while ((opt = getopt(argc, argv, "ei:mnT:to:au:WwENd:")) != -1) {
+       while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:")) != -1) {
                switch (opt) {
                case 'e':
                        external_module = true;
@@ -2282,6 +2274,9 @@ int main(int argc, char **argv)
                        dl->file = optarg;
                        list_add_tail(&dl->list, &dump_lists);
                        break;
+               case 'M':
+                       module_enabled = true;
+                       break;
                case 'm':
                        modversions = true;
                        break;
index dfdb948..5f94c2c 100644 (file)
 #define ELF_R_TYPE  ELF64_R_TYPE
 #endif
 
-/* The 64-bit MIPS ELF ABI uses an unusual reloc format. */
-typedef struct
-{
-       Elf32_Word    r_sym;    /* Symbol index */
-       unsigned char r_ssym;   /* Special symbol for 2nd relocation */
-       unsigned char r_type3;  /* 3rd relocation type */
-       unsigned char r_type2;  /* 2nd relocation type */
-       unsigned char r_type1;  /* 1st relocation type */
-} _Elf64_Mips_R_Info;
-
-typedef union
-{
-       Elf64_Xword             r_info_number;
-       _Elf64_Mips_R_Info      r_info_fields;
-} _Elf64_Mips_R_Info_union;
-
-#define ELF64_MIPS_R_SYM(i) \
-  ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_sym)
-
-#define ELF64_MIPS_R_TYPE(i) \
-  ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_type1)
-
 #if KERNEL_ELFDATA != HOST_ELFDATA
 
 static inline void __endian(const void *src, void *dest, unsigned int size)
index 032774e..bf3f856 100755 (executable)
@@ -162,34 +162,7 @@ install_kernel_headers () {
 
        rm -rf $pdir
 
-       (
-               cd $srctree
-               find . arch/$SRCARCH -maxdepth 1 -name Makefile\*
-               find include scripts -type f -o -type l
-               find arch/$SRCARCH -name Kbuild.platforms -o -name Platform
-               find $(find arch/$SRCARCH -name include -o -name scripts -type d) -type f
-       ) > debian/hdrsrcfiles
-
-       {
-               if is_enabled CONFIG_OBJTOOL; then
-                       echo tools/objtool/objtool
-               fi
-
-               find arch/$SRCARCH/include Module.symvers include scripts -type f
-
-               if is_enabled CONFIG_GCC_PLUGINS; then
-                       find scripts/gcc-plugins -name \*.so
-               fi
-       } > debian/hdrobjfiles
-
-       destdir=$pdir/usr/src/linux-headers-$version
-       mkdir -p $destdir
-       tar -c -f - -C $srctree -T debian/hdrsrcfiles | tar -xf - -C $destdir
-       tar -c -f - -T debian/hdrobjfiles | tar -xf - -C $destdir
-       rm -f debian/hdrsrcfiles debian/hdrobjfiles
-
-       # copy .config manually to be where it's expected to be
-       cp $KCONFIG_CONFIG $destdir/.config
+       "${srctree}/scripts/package/install-extmod-build" "${pdir}/usr/src/linux-headers-${version}"
 
        mkdir -p $pdir/lib/modules/$version/
        ln -s /usr/src/linux-headers-$version $pdir/lib/modules/$version/build
diff --git a/scripts/package/debian/rules b/scripts/package/debian/rules
new file mode 100755 (executable)
index 0000000..3dafa94
--- /dev/null
@@ -0,0 +1,33 @@
+#!/usr/bin/make -f
+# SPDX-License-Identifier: GPL-2.0-only
+
+include debian/rules.vars
+
+srctree ?= .
+
+ifneq (,$(filter-out parallel=1,$(filter parallel=%,$(DEB_BUILD_OPTIONS))))
+    NUMJOBS = $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
+    MAKEFLAGS += -j$(NUMJOBS)
+endif
+
+.PHONY: binary binary-indep binary-arch
+binary: binary-arch binary-indep
+binary-indep: build-indep
+binary-arch: build-arch
+       $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \
+       KERNELRELEASE=$(KERNELRELEASE) \
+       run-command KBUILD_RUN_COMMAND=+$(srctree)/scripts/package/builddeb
+
+.PHONY: build build-indep build-arch
+build: build-arch build-indep
+build-indep:
+build-arch:
+       $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \
+       KERNELRELEASE=$(KERNELRELEASE) \
+       $(shell $(srctree)/scripts/package/deb-build-option) \
+       olddefconfig all
+
+.PHONY: clean
+clean:
+       rm -rf debian/files debian/linux-*
+       $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) clean
diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build
new file mode 100755 (executable)
index 0000000..af7fe9f
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+set -e
+
+destdir=${1}
+
+test -n "${srctree}"
+test -n "${SRCARCH}"
+
+is_enabled() {
+       grep -q "^$1=y" include/config/auto.conf
+}
+
+mkdir -p "${destdir}"
+
+(
+       cd "${srctree}"
+       echo Makefile
+       find "arch/${SRCARCH}" -maxdepth 1 -name 'Makefile*'
+       find include scripts -type f -o -type l
+       find "arch/${SRCARCH}" -name Kbuild.platforms -o -name Platform
+       find "$(find "arch/${SRCARCH}" -name include -o -name scripts -type d)" -type f
+) | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${destdir}"
+
+{
+       if is_enabled CONFIG_OBJTOOL; then
+               echo tools/objtool/objtool
+       fi
+
+       find "arch/${SRCARCH}/include" Module.symvers include scripts -type f
+
+       if is_enabled CONFIG_GCC_PLUGINS; then
+               find scripts/gcc-plugins -name '*.so'
+       fi
+} | tar -c -f - -T - | tar -xf - -C "${destdir}"
+
+# copy .config manually to be where it's expected to be
+cp "${KCONFIG_CONFIG}" "${destdir}/.config"
diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec
new file mode 100644 (file)
index 0000000..ac3f2ee
--- /dev/null
@@ -0,0 +1,117 @@
+# _arch is undefined if /usr/lib/rpm/platform/*/macros was not included.
+%{!?_arch: %define _arch dummy}
+%{!?make: %define make make}
+%define makeflags %{?_smp_mflags} ARCH=%{ARCH}
+%define __spec_install_post /usr/lib/rpm/brp-compress || :
+%define debug_package %{nil}
+
+Name: kernel
+Summary: The Linux Kernel
+Version: %(echo %{KERNELRELEASE} | sed -e 's/-/_/g')
+Release: %{pkg_release}
+License: GPL
+Group: System Environment/Kernel
+Vendor: The Linux Community
+URL: https://www.kernel.org
+Source0: linux.tar.gz
+Source1: config
+Source2: diff.patch
+Provides: kernel-%{KERNELRELEASE}
+BuildRequires: bc binutils bison dwarves
+BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
+BuildRequires: gcc make openssl openssl-devel perl python3 rsync
+
+%description
+The Linux Kernel, the operating system core itself
+
+%package headers
+Summary: Header files for the Linux kernel for use by glibc
+Group: Development/System
+Obsoletes: kernel-headers
+Provides: kernel-headers = %{version}
+%description headers
+Kernel-headers includes the C header files that specify the interface
+between the Linux kernel and userspace libraries and programs.  The
+header files define structures and constants that are needed for
+building most standard programs and are also needed for rebuilding the
+glibc package.
+
+%if %{with_devel}
+%package devel
+Summary: Development package for building kernel modules to match the %{version} kernel
+Group: System Environment/Kernel
+AutoReqProv: no
+%description -n kernel-devel
+This package provides kernel headers and makefiles sufficient to build modules
+against the %{version} kernel package.
+%endif
+
+%prep
+%setup -q -n linux
+cp %{SOURCE1} .config
+patch -p1 < %{SOURCE2}
+
+%build
+%{make} %{makeflags} KERNELRELEASE=%{KERNELRELEASE} KBUILD_BUILD_VERSION=%{release}
+
+%install
+mkdir -p %{buildroot}/boot
+%ifarch ia64
+mkdir -p %{buildroot}/boot/efi
+cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/efi/vmlinuz-%{KERNELRELEASE}
+ln -s efi/vmlinuz-%{KERNELRELEASE} %{buildroot}/boot/
+%else
+cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE}
+%endif
+%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install
+%{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
+cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE}
+cp .config %{buildroot}/boot/config-%{KERNELRELEASE}
+ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build
+ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/source
+%if %{with_devel}
+%{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}'
+%endif
+
+%clean
+rm -rf %{buildroot}
+
+%post
+if [ -x /sbin/installkernel -a -r /boot/vmlinuz-%{KERNELRELEASE} -a -r /boot/System.map-%{KERNELRELEASE} ]; then
+cp /boot/vmlinuz-%{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm
+cp /boot/System.map-%{KERNELRELEASE} /boot/.System.map-%{KERNELRELEASE}-rpm
+rm -f /boot/vmlinuz-%{KERNELRELEASE} /boot/System.map-%{KERNELRELEASE}
+/sbin/installkernel %{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
+rm -f /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
+fi
+
+%preun
+if [ -x /sbin/new-kernel-pkg ]; then
+new-kernel-pkg --remove %{KERNELRELEASE} --rminitrd --initrdfile=/boot/initramfs-%{KERNELRELEASE}.img
+elif [ -x /usr/bin/kernel-install ]; then
+kernel-install remove %{KERNELRELEASE}
+fi
+
+%postun
+if [ -x /sbin/update-bootloader ]; then
+/sbin/update-bootloader --remove %{KERNELRELEASE}
+fi
+
+%files
+%defattr (-, root, root)
+/lib/modules/%{KERNELRELEASE}
+%exclude /lib/modules/%{KERNELRELEASE}/build
+%exclude /lib/modules/%{KERNELRELEASE}/source
+/boot/*
+
+%files headers
+%defattr (-, root, root)
+/usr/include
+
+%if %{with_devel}
+%files devel
+%defattr (-, root, root)
+/usr/src/kernels/%{KERNELRELEASE}
+/lib/modules/%{KERNELRELEASE}/build
+/lib/modules/%{KERNELRELEASE}/source
+%endif
index ba2453e..5044224 100755 (executable)
@@ -263,34 +263,11 @@ Description: Linux kernel debugging symbols for $version
 EOF
 fi
 
-cat <<EOF > debian/rules
-#!$(command -v $MAKE) -f
-
-srctree ?= .
-KERNELRELEASE = ${KERNELRELEASE}
-
-.PHONY: clean build build-arch build-indep binary binary-arch binary-indep
-
-build-indep:
-build-arch:
-       \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \
-       KERNELRELEASE=\$(KERNELRELEASE) \
-       \$(shell \$(srctree)/scripts/package/deb-build-option) \
-       olddefconfig all
-
-build: build-arch
-
-binary-indep:
-binary-arch: build-arch
-       \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \
-       KERNELRELEASE=\$(KERNELRELEASE) intdeb-pkg
-
-clean:
-       rm -rf debian/files debian/linux-*
-       \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} clean
-
-binary: binary-arch
+cat <<EOF > debian/rules.vars
+ARCH := ${ARCH}
+KERNELRELEASE := ${KERNELRELEASE}
 EOF
-chmod +x debian/rules
+
+cp "${srctree}/scripts/package/debian/rules" debian/
 
 exit 0
index 8049f0e..d41608e 100755 (executable)
 #      Patched for non-x86 by Opencon (L) 2002 <opencon@rio.skydome.net>
 #
 
-# how we were called determines which rpms we build and how we build them
-if [ "$1" = prebuilt ]; then
-       S=DEL
-       MAKE="$MAKE -f $srctree/Makefile"
-else
-       S=
-
-       mkdir -p rpmbuild/SOURCES
-       cp linux.tar.gz rpmbuild/SOURCES
-       cp "${KCONFIG_CONFIG}" rpmbuild/SOURCES/config
-       "${srctree}/scripts/package/gen-diff-patch" rpmbuild/SOURCES/diff.patch
-fi
-
 if grep -q CONFIG_MODULES=y include/config/auto.conf; then
-       M=
+echo '%define with_devel %{?_without_devel: 0} %{?!_without_devel: 1}'
 else
-       M=DEL
+echo '%define with_devel 0'
 fi
 
-__KERNELRELEASE=$(echo $KERNELRELEASE | sed -e "s/-/_/g")
-EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \
---exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation \
---exclude=.config.old --exclude=.missing-syscalls.d --exclude=*.s"
-
-# We can label the here-doc lines for conditional output to the spec file
-#
-# Labels:
-#  $S: this line is enabled only when building source package
-#  $M: this line is enabled only when CONFIG_MODULES is enabled
-sed -e '/^DEL/d' -e 's/^\t*//' <<EOF
-       Name: kernel
-       Summary: The Linux Kernel
-       Version: $__KERNELRELEASE
-       Release: $(cat .version 2>/dev/null || echo 1)
-       License: GPL
-       Group: System Environment/Kernel
-       Vendor: The Linux Community
-       URL: https://www.kernel.org
-$S     Source0: linux.tar.gz
-$S     Source1: config
-$S     Source2: diff.patch
-       Provides: kernel-$KERNELRELEASE
-$S     BuildRequires: bc binutils bison dwarves
-$S     BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
-$S     BuildRequires: gcc make openssl openssl-devel perl python3 rsync
-
-       # $UTS_MACHINE as a fallback of _arch in case
-       # /usr/lib/rpm/platform/*/macros was not included.
-       %define _arch %{?_arch:$UTS_MACHINE}
-       %define __spec_install_post /usr/lib/rpm/brp-compress || :
-       %define debug_package %{nil}
-
-       %description
-       The Linux Kernel, the operating system core itself
-
-       %package headers
-       Summary: Header files for the Linux kernel for use by glibc
-       Group: Development/System
-       Obsoletes: kernel-headers
-       Provides: kernel-headers = %{version}
-       %description headers
-       Kernel-headers includes the C header files that specify the interface
-       between the Linux kernel and userspace libraries and programs.  The
-       header files define structures and constants that are needed for
-       building most standard programs and are also needed for rebuilding the
-       glibc package.
-
-$S$M   %package devel
-$S$M   Summary: Development package for building kernel modules to match the $__KERNELRELEASE kernel
-$S$M   Group: System Environment/Kernel
-$S$M   AutoReqProv: no
-$S$M   %description -n kernel-devel
-$S$M   This package provides kernel headers and makefiles sufficient to build modules
-$S$M   against the $__KERNELRELEASE kernel package.
-$S$M
-$S     %prep
-$S     %setup -q -n linux
-$S     cp %{SOURCE1} .config
-$S     patch -p1 < %{SOURCE2}
-$S
-$S     %build
-$S     $MAKE %{?_smp_mflags} KERNELRELEASE=$KERNELRELEASE KBUILD_BUILD_VERSION=%{release}
-$S
-       %install
-       mkdir -p %{buildroot}/boot
-       %ifarch ia64
-       mkdir -p %{buildroot}/boot/efi
-       cp \$($MAKE -s image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE
-       ln -s efi/vmlinuz-$KERNELRELEASE %{buildroot}/boot/
-       %else
-       cp \$($MAKE -s image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE
-       %endif
-$M     $MAKE %{?_smp_mflags} INSTALL_MOD_PATH=%{buildroot} modules_install
-       $MAKE %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
-       cp System.map %{buildroot}/boot/System.map-$KERNELRELEASE
-       cp .config %{buildroot}/boot/config-$KERNELRELEASE
-$S$M   rm -f %{buildroot}/lib/modules/$KERNELRELEASE/build
-$S$M   rm -f %{buildroot}/lib/modules/$KERNELRELEASE/source
-$S$M   mkdir -p %{buildroot}/usr/src/kernels/$KERNELRELEASE
-$S$M   tar cf - $EXCLUDES . | tar xf - -C %{buildroot}/usr/src/kernels/$KERNELRELEASE
-$S$M   cd %{buildroot}/lib/modules/$KERNELRELEASE
-$S$M   ln -sf /usr/src/kernels/$KERNELRELEASE build
-$S$M   ln -sf /usr/src/kernels/$KERNELRELEASE source
-
-       %clean
-       rm -rf %{buildroot}
-
-       %post
-       if [ -x /sbin/installkernel -a -r /boot/vmlinuz-$KERNELRELEASE -a -r /boot/System.map-$KERNELRELEASE ]; then
-       cp /boot/vmlinuz-$KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm
-       cp /boot/System.map-$KERNELRELEASE /boot/.System.map-$KERNELRELEASE-rpm
-       rm -f /boot/vmlinuz-$KERNELRELEASE /boot/System.map-$KERNELRELEASE
-       /sbin/installkernel $KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm
-       rm -f /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm
-       fi
-
-       %preun
-       if [ -x /sbin/new-kernel-pkg ]; then
-       new-kernel-pkg --remove $KERNELRELEASE --rminitrd --initrdfile=/boot/initramfs-$KERNELRELEASE.img
-       elif [ -x /usr/bin/kernel-install ]; then
-       kernel-install remove $KERNELRELEASE
-       fi
-
-       %postun
-       if [ -x /sbin/update-bootloader ]; then
-       /sbin/update-bootloader --remove $KERNELRELEASE
-       fi
-
-       %files
-       %defattr (-, root, root)
-$M     /lib/modules/$KERNELRELEASE
-$M     %exclude /lib/modules/$KERNELRELEASE/build
-$M     %exclude /lib/modules/$KERNELRELEASE/source
-       /boot/*
-
-       %files headers
-       %defattr (-, root, root)
-       /usr/include
-$S$M
-$S$M   %files devel
-$S$M   %defattr (-, root, root)
-$S$M   /usr/src/kernels/$KERNELRELEASE
-$S$M   /lib/modules/$KERNELRELEASE/build
-$S$M   /lib/modules/$KERNELRELEASE/source
+cat<<EOF
+%define ARCH ${ARCH}
+%define KERNELRELEASE ${KERNELRELEASE}
+%define pkg_release $("${srctree}/init/build-version")
 EOF
+
+cat "${srctree}/scripts/package/kernel.spec"
index f3659ea..8b1a636 100755 (executable)
@@ -37,3 +37,5 @@ rm -f .scmversion
 rm -rf include/ksym
 
 find . -name '*.usyms' | xargs rm -f
+
+rm -f binkernel.spec
index 3d3baba..38b96c6 100755 (executable)
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 #
 # This scripts adds local version information from the version
-# control systems git, mercurial (hg) and subversion (svn).
+# control system git.
 #
 # If something goes wrong, send a mail the kernel build mailinglist
 # (see MAINTAINERS) and CC Nico Schottelius
@@ -57,21 +57,37 @@ scm_version()
                return
        fi
 
-       # If a localversion*' file and the corresponding annotated tag exist,
-       # use it. This is the case in linux-next.
+       # mainline kernel:  6.2.0-rc5  ->  v6.2-rc5
+       # stable kernel:    6.1.7      ->  v6.1.7
+       version_tag=v$(echo "${KERNELVERSION}" | sed -E 's/^([0-9]+\.[0-9]+)\.0(.*)$/\1\2/')
+
+       # If a localversion* file exists, and the corresponding
+       # annotated tag exists and is an ancestor of HEAD, use
+       # it. This is the case in linux-next.
        tag=${file_localversion#-}
-       tag=$(git describe --exact-match --match=$tag $tag 2>/dev/null)
+       desc=
+       if [ -n "${tag}" ]; then
+               desc=$(git describe --match=$tag 2>/dev/null)
+       fi
+
+       # Otherwise, if a localversion* file exists, and the tag
+       # obtained by appending it to the tag derived from
+       # KERNELVERSION exists and is an ancestor of HEAD, use
+       # it. This is e.g. the case in linux-rt.
+       if [ -z "${desc}" ] && [ -n "${file_localversion}" ]; then
+               tag="${version_tag}${file_localversion}"
+               desc=$(git describe --match=$tag 2>/dev/null)
+       fi
 
        # Otherwise, default to the annotated tag derived from KERNELVERSION.
-       #   mainline kernel:  6.2.0-rc5  ->  v6.2-rc5
-       #   stable kernel:    6.1.7      ->  v6.1.7
-       if [ -z "${tag}" ]; then
-               tag=v$(echo "${KERNELVERSION}" | sed -E 's/^([0-9]+\.[0-9]+)\.0(.*)$/\1\2/')
+       if [ -z "${desc}" ]; then
+               tag="${version_tag}"
+               desc=$(git describe --match=$tag 2>/dev/null)
        fi
 
        # If we are at the tagged commit, we ignore it because the version is
        # well-defined.
-       if [ -z "$(git describe --exact-match --match=$tag 2>/dev/null)" ]; then
+       if [ "${tag}" != "${desc}" ]; then
 
                # If only the short version is requested, don't bother
                # running further git commands
@@ -81,8 +97,8 @@ scm_version()
                fi
                # If we are past the tagged commit, we pretty print it.
                # (like 6.1.0-14595-g292a089d78d3)
-               if atag="$(git describe --match=$tag 2>/dev/null)"; then
-                       echo "$atag" | awk -F- '{printf("-%05d", $(NF-1))}'
+               if [ -n "${desc}" ]; then
+                       echo "${desc}" | awk -F- '{printf("-%05d", $(NF-1))}'
                fi
 
                # Add -g and exactly 12 hex chars.
index d43231b..55b1df8 100644 (file)
@@ -67,7 +67,7 @@ struct landlock_rule {
         * @layers: Stack of layers, from the latest to the newest, implemented
         * as a flexible array member (FAM).
         */
-       struct landlock_layer layers[];
+       struct landlock_layer layers[] __counted_by(num_layers);
 };
 
 /**
index 4859fb1..a11cd7d 100644 (file)
@@ -1992,8 +1992,8 @@ static int default_write_copy(struct snd_pcm_substream *substream,
                              int channel, unsigned long hwoff,
                              struct iov_iter *iter, unsigned long bytes)
 {
-       if (!copy_from_iter(get_dma_ptr(substream->runtime, channel, hwoff),
-                           bytes, iter))
+       if (copy_from_iter(get_dma_ptr(substream->runtime, channel, hwoff),
+                          bytes, iter) != bytes)
                return -EFAULT;
        return 0;
 }
@@ -2025,8 +2025,8 @@ static int default_read_copy(struct snd_pcm_substream *substream,
                             int channel, unsigned long hwoff,
                             struct iov_iter *iter, unsigned long bytes)
 {
-       if (!copy_to_iter(get_dma_ptr(substream->runtime, channel, hwoff),
-                         bytes, iter))
+       if (copy_to_iter(get_dma_ptr(substream->runtime, channel, hwoff),
+                        bytes, iter) != bytes)
                return -EFAULT;
        return 0;
 }
index 174585b..b603bb9 100644 (file)
@@ -187,8 +187,13 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char
        err = expand_var_event(event, 0, len, buf, in_kernel);
        if (err < 0)
                return err;
-       if (len != newlen)
-               memset(buf + len, 0, newlen - len);
+       if (len != newlen) {
+               if (in_kernel)
+                       memset(buf + len, 0, newlen - len);
+               else if (clear_user((__force void __user *)buf + len,
+                                   newlen - len))
+                       return -EFAULT;
+       }
        return newlen;
 }
 EXPORT_SYMBOL(snd_seq_expand_var_event);
index c05935c..9234d4f 100644 (file)
@@ -456,7 +456,7 @@ static int emu8k_pcm_silence(struct snd_pcm_substream *subs,
        /* convert to word unit */
        pos = (pos << 1) + rec->loop_start[voice];
        count <<= 1;
-       LOOP_WRITE(rec, pos, USER_SOCKPTR(NULL), count);
+       LOOP_WRITE(rec, pos, NULL, count);
        return 0;
 }
 
index 0ba1fbc..6278999 100644 (file)
@@ -888,7 +888,7 @@ static void cs42l42_resume(struct sub_codec *cs42l42)
 
        /* Initialize CS42L42 companion codec */
        cs8409_i2c_bulk_write(cs42l42, cs42l42->init_seq, cs42l42->init_seq_num);
-       usleep_range(30000, 35000);
+       msleep(CS42L42_INIT_TIMEOUT_MS);
 
        /* Clear interrupts, by reading interrupt status registers */
        cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs));
index 2a8dfb4..937e938 100644 (file)
@@ -229,6 +229,7 @@ enum cs8409_coefficient_index_registers {
 #define CS42L42_I2C_SLEEP_US                   (2000)
 #define CS42L42_PDN_TIMEOUT_US                 (250000)
 #define CS42L42_PDN_SLEEP_US                   (2000)
+#define CS42L42_INIT_TIMEOUT_MS                        (45)
 #define CS42L42_FULL_SCALE_VOL_MASK            (2)
 #define CS42L42_FULL_SCALE_VOL_0DB             (1)
 #define CS42L42_FULL_SCALE_VOL_MINUS6DB                (0)
index a07df6f..b7e78bf 100644 (file)
@@ -7057,6 +7057,27 @@ static void alc295_fixup_dell_inspiron_top_speakers(struct hda_codec *codec,
        }
 }
 
+/* Forcibly assign NID 0x03 to HP while NID 0x02 to SPK */
+static void alc287_fixup_bind_dacs(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+       static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */
+       static const hda_nid_t preferred_pairs[] = {
+               0x17, 0x02, 0x21, 0x03, 0
+       };
+
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn);
+       spec->gen.preferred_dacs = preferred_pairs;
+       spec->gen.auto_mute_via_amp = 1;
+       snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
+                           0x0); /* Make sure 0x14 was disable */
+}
+
+
 enum {
        ALC269_FIXUP_GPIO2,
        ALC269_FIXUP_SONY_VAIO,
@@ -7319,6 +7340,7 @@ enum {
        ALC287_FIXUP_TAS2781_I2C,
        ALC245_FIXUP_HP_MUTE_LED_COEFBIT,
        ALC245_FIXUP_HP_X360_MUTE_LEDS,
+       ALC287_FIXUP_THINKPAD_I2S_SPK,
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9413,6 +9435,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC245_FIXUP_HP_GPIO_LED
        },
+       [ALC287_FIXUP_THINKPAD_I2S_SPK] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc287_fixup_bind_dacs,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -10544,6 +10570,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x17, 0x90170111},
                {0x19, 0x03a11030},
                {0x21, 0x03211020}),
+       SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK,
+               {0x17, 0x90170110},
+               {0x19, 0x03a11030},
+               {0x21, 0x03211020}),
        SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE,
                {0x12, 0x90a60130},
                {0x17, 0x90170110},
index 37114fd..fb80280 100644 (file)
@@ -173,16 +173,6 @@ static int tasdevice_get_profile_id(struct snd_kcontrol *kcontrol,
        return 0;
 }
 
-static int tasdevice_hda_clamp(int val, int max)
-{
-       if (val > max)
-               val = max;
-
-       if (val < 0)
-               val = 0;
-       return val;
-}
-
 static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
                struct snd_ctl_elem_value *ucontrol)
 {
@@ -191,7 +181,7 @@ static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
        int max = tas_priv->rcabin.ncfgs - 1;
        int val, ret = 0;
 
-       val = tasdevice_hda_clamp(nr_profile, max);
+       val = clamp(nr_profile, 0, max);
 
        if (tas_priv->rcabin.profile_cfg_id != val) {
                tas_priv->rcabin.profile_cfg_id = val;
@@ -248,7 +238,7 @@ static int tasdevice_program_put(struct snd_kcontrol *kcontrol,
        int max = tas_fw->nr_programs - 1;
        int val, ret = 0;
 
-       val = tasdevice_hda_clamp(nr_program, max);
+       val = clamp(nr_program, 0, max);
 
        if (tas_priv->cur_prog != val) {
                tas_priv->cur_prog = val;
@@ -277,7 +267,7 @@ static int tasdevice_config_put(struct snd_kcontrol *kcontrol,
        int max = tas_fw->nr_configurations - 1;
        int val, ret = 0;
 
-       val = tasdevice_hda_clamp(nr_config, max);
+       val = clamp(nr_config, 0, max);
 
        if (tas_priv->cur_conf != val) {
                tas_priv->cur_conf = val;
index b304b35..3ec15b4 100644 (file)
@@ -217,6 +217,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                .driver_data = &acp6x_card,
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "82TL"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "82V2"),
                }
        },
@@ -328,6 +335,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
        {
                .driver_data = &acp6x_card,
                .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+                       DMI_MATCH(DMI_BOARD_NAME, "8A3E"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "MECHREVO"),
                        DMI_MATCH(DMI_BOARD_NAME, "MRID6"),
                }
index afe213a..dcc4e14 100644 (file)
@@ -954,7 +954,7 @@ static int mchp_pdmc_dt_init(struct mchp_pdmc *dd)
 /* used to clean the channel index found on RHR's MSB */
 static int mchp_pdmc_process(struct snd_pcm_substream *substream,
                             int channel, unsigned long hwoff,
-                            struct iov_iter *buf, unsigned long bytes)
+                            unsigned long bytes)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        u8 *dma_ptr = runtime->dma_area + hwoff +
index 95b5bd8..f1e1dbc 100644 (file)
@@ -1968,11 +1968,15 @@ config SND_SOC_UDA1380
        tristate
        depends on I2C
 
+config SND_SOC_WCD_CLASSH
+       tristate
+
 config SND_SOC_WCD9335
        tristate "WCD9335 Codec"
        depends on SLIMBUS
        select REGMAP_SLIMBUS
        select REGMAP_IRQ
+       select SND_SOC_WCD_CLASSH
        help
          The WCD9335 is a standalone Hi-Fi audio CODEC IC, supports
          Qualcomm Technologies, Inc. (QTI) multimedia solutions,
@@ -1987,6 +1991,7 @@ config SND_SOC_WCD934X
        depends on SLIMBUS
        select REGMAP_IRQ
        select REGMAP_SLIMBUS
+       select SND_SOC_WCD_CLASSH
        select SND_SOC_WCD_MBHC
        depends on MFD_WCD934X || COMPILE_TEST
        help
@@ -1997,6 +2002,7 @@ config SND_SOC_WCD938X
        depends on SND_SOC_WCD938X_SDW
        tristate
        depends on SOUNDWIRE || !SOUNDWIRE
+       select SND_SOC_WCD_CLASSH
 
 config SND_SOC_WCD938X_SDW
        tristate "WCD9380/WCD9385 Codec - SDW"
index c8502a4..a87e569 100644 (file)
@@ -303,10 +303,11 @@ snd-soc-twl4030-objs := twl4030.o
 snd-soc-twl6040-objs := twl6040.o
 snd-soc-uda1334-objs := uda1334.o
 snd-soc-uda1380-objs := uda1380.o
+snd-soc-wcd-classh-objs := wcd-clsh-v2.o
 snd-soc-wcd-mbhc-objs := wcd-mbhc-v2.o
-snd-soc-wcd9335-objs := wcd-clsh-v2.o wcd9335.o
-snd-soc-wcd934x-objs := wcd-clsh-v2.o wcd934x.o
-snd-soc-wcd938x-objs := wcd938x.o wcd-clsh-v2.o
+snd-soc-wcd9335-objs := wcd9335.o
+snd-soc-wcd934x-objs := wcd934x.o
+snd-soc-wcd938x-objs := wcd938x.o
 snd-soc-wcd938x-sdw-objs := wcd938x-sdw.o
 snd-soc-wl1273-objs := wl1273.o
 snd-soc-wm-adsp-objs := wm_adsp.o
@@ -685,6 +686,7 @@ obj-$(CONFIG_SND_SOC_TWL4030)       += snd-soc-twl4030.o
 obj-$(CONFIG_SND_SOC_TWL6040)  += snd-soc-twl6040.o
 obj-$(CONFIG_SND_SOC_UDA1334)  += snd-soc-uda1334.o
 obj-$(CONFIG_SND_SOC_UDA1380)  += snd-soc-uda1380.o
+obj-$(CONFIG_SND_SOC_WCD_CLASSH)       += snd-soc-wcd-classh.o
 obj-$(CONFIG_SND_SOC_WCD_MBHC) += snd-soc-wcd-mbhc.o
 obj-$(CONFIG_SND_SOC_WCD9335)  += snd-soc-wcd9335.o
 obj-$(CONFIG_SND_SOC_WCD934X)  += snd-soc-wcd934x.o
index d1edb98..be4f422 100644 (file)
@@ -279,7 +279,7 @@ static const struct snd_kcontrol_new cs35l45_dsp_muxes[] = {
 };
 
 static const struct snd_kcontrol_new cs35l45_dac_muxes[] = {
-       SOC_DAPM_ENUM("DACPCM1 Source", cs35l45_dacpcm_enums[0]),
+       SOC_DAPM_ENUM("DACPCM Source", cs35l45_dacpcm_enums[0]),
 };
 
 static const struct snd_soc_dapm_widget cs35l45_dapm_widgets[] = {
@@ -333,7 +333,7 @@ static const struct snd_soc_dapm_widget cs35l45_dapm_widgets[] = {
        SND_SOC_DAPM_MUX("DSP_RX7 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dsp_muxes[6]),
        SND_SOC_DAPM_MUX("DSP_RX8 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dsp_muxes[7]),
 
-       SND_SOC_DAPM_MUX("DACPCM1 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dac_muxes[0]),
+       SND_SOC_DAPM_MUX("DACPCM Source", SND_SOC_NOPM, 0, 0, &cs35l45_dac_muxes[0]),
 
        SND_SOC_DAPM_OUT_DRV("AMP", SND_SOC_NOPM, 0, 0, NULL, 0),
 
@@ -403,7 +403,7 @@ static const struct snd_soc_dapm_route cs35l45_dapm_routes[] = {
        { "ASP_RX1", NULL, "ASP_EN" },
        { "ASP_RX2", NULL, "ASP_EN" },
 
-       { "AMP", NULL, "DACPCM1 Source"},
+       { "AMP", NULL, "DACPCM Source"},
        { "AMP", NULL, "GLOBAL_EN"},
 
        CS35L45_DSP_MUX_ROUTE("DSP_RX1"),
@@ -427,7 +427,7 @@ static const struct snd_soc_dapm_route cs35l45_dapm_routes[] = {
        {"DSP1 Preload", NULL, "DSP1 Preloader"},
        {"DSP1", NULL, "DSP1 Preloader"},
 
-       CS35L45_DAC_MUX_ROUTE("DACPCM1"),
+       CS35L45_DAC_MUX_ROUTE("DACPCM"),
 
        { "SPK", NULL, "AMP"},
 };
@@ -969,7 +969,7 @@ static irqreturn_t cs35l45_dsp_virt2_mbox_cb(int irq, void *data)
 
        ret = regmap_read(cs35l45->regmap, CS35L45_DSP_VIRT2_MBOX_3, &mbox_val);
        if (!ret && mbox_val)
-               ret = cs35l45_dsp_virt2_mbox3_irq_handle(cs35l45, mbox_val & CS35L45_MBOX3_CMD_MASK,
+               cs35l45_dsp_virt2_mbox3_irq_handle(cs35l45, mbox_val & CS35L45_MBOX3_CMD_MASK,
                                (mbox_val & CS35L45_MBOX3_DATA_MASK) >> CS35L45_MBOX3_DATA_SHIFT);
 
        /* Handle DSP trace log IRQ */
@@ -1078,6 +1078,7 @@ static int cs35l45_initialize(struct cs35l45_private *cs35l45)
 
        switch (dev_id[0]) {
        case 0x35A450:
+       case 0x35A460:
                break;
        default:
                dev_err(cs35l45->dev, "Bad DEVID 0x%x\n", dev_id[0]);
index ae373f3..98b1e63 100644 (file)
@@ -243,26 +243,27 @@ int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base)
 {
        unsigned int reg;
        unsigned int val;
-       int ret;
+       int read_ret, poll_ret;
 
        if (cs35l56_base->rev < CS35L56_REVID_B0)
                reg = CS35L56_DSP1_HALO_STATE_A1;
        else
                reg = CS35L56_DSP1_HALO_STATE;
 
-       ret = regmap_read_poll_timeout(cs35l56_base->regmap, reg,
-                                      val,
-                                      (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE),
-                                      CS35L56_HALO_STATE_POLL_US,
-                                      CS35L56_HALO_STATE_TIMEOUT_US);
-
-       if ((ret < 0) && (ret != -ETIMEDOUT)) {
-               dev_err(cs35l56_base->dev, "Failed to read HALO_STATE: %d\n", ret);
-               return ret;
-       }
-
-       if ((ret == -ETIMEDOUT) || (val != CS35L56_HALO_STATE_BOOT_DONE)) {
-               dev_err(cs35l56_base->dev, "Firmware boot fail: HALO_STATE=%#x\n", val);
+       /*
+        * This can't be a regmap_read_poll_timeout() because cs35l56 will NAK
+        * I2C until it has booted which would terminate the poll
+        */
+       poll_ret = read_poll_timeout(regmap_read, read_ret,
+                                    (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE),
+                                    CS35L56_HALO_STATE_POLL_US,
+                                    CS35L56_HALO_STATE_TIMEOUT_US,
+                                    false,
+                                    cs35l56_base->regmap, reg, &val);
+
+       if (poll_ret) {
+               dev_err(cs35l56_base->dev, "Firmware boot timed out(%d): HALO_STATE=%#x\n",
+                       read_ret, val);
                return -EIO;
        }
 
index 24e718e..1a95c37 100644 (file)
@@ -2205,7 +2205,8 @@ static int cs42l43_codec_probe(struct platform_device *pdev)
        // Don't use devm as we need to get against the MFD device
        priv->mclk = clk_get_optional(cs42l43->dev, "mclk");
        if (IS_ERR(priv->mclk)) {
-               dev_err_probe(priv->dev, PTR_ERR(priv->mclk), "Failed to get mclk\n");
+               ret = PTR_ERR(priv->mclk);
+               dev_err_probe(priv->dev, ret, "Failed to get mclk\n");
                goto err_pm;
        }
 
index 038d93e..1a137ca 100644 (file)
@@ -3269,13 +3269,17 @@ static int rt5645_component_set_jack(struct snd_soc_component *component,
 {
        struct snd_soc_jack *mic_jack = NULL;
        struct snd_soc_jack *btn_jack = NULL;
-       int *type = (int *)data;
+       int type;
 
-       if (*type & SND_JACK_MICROPHONE)
-               mic_jack = hs_jack;
-       if (*type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
-               SND_JACK_BTN_2 | SND_JACK_BTN_3))
-               btn_jack = hs_jack;
+       if (hs_jack) {
+               type = *(int *)data;
+
+               if (type & SND_JACK_MICROPHONE)
+                       mic_jack = hs_jack;
+               if (type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+                       SND_JACK_BTN_2 | SND_JACK_BTN_3))
+                       btn_jack = hs_jack;
+       }
 
        return rt5645_set_jack_detect(component, hs_jack, mic_jack, btn_jack);
 }
index a75db27..d96e23e 100644 (file)
@@ -355,6 +355,7 @@ void wcd_clsh_set_hph_mode(struct wcd_clsh_ctrl *ctrl, int mode)
                wcd_clsh_v2_set_hph_mode(comp, mode);
 
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_set_hph_mode);
 
 static void wcd_clsh_set_flyback_current(struct snd_soc_component *comp,
                                         int mode)
@@ -869,11 +870,13 @@ int wcd_clsh_ctrl_set_state(struct wcd_clsh_ctrl *ctrl,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_set_state);
 
 int wcd_clsh_ctrl_get_state(struct wcd_clsh_ctrl *ctrl)
 {
        return ctrl->state;
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_get_state);
 
 struct wcd_clsh_ctrl *wcd_clsh_ctrl_alloc(struct snd_soc_component *comp,
                                          int version)
@@ -890,8 +893,13 @@ struct wcd_clsh_ctrl *wcd_clsh_ctrl_alloc(struct snd_soc_component *comp,
 
        return ctrl;
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_alloc);
 
 void wcd_clsh_ctrl_free(struct wcd_clsh_ctrl *ctrl)
 {
        kfree(ctrl);
 }
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_free);
+
+MODULE_DESCRIPTION("WCD93XX Class-H driver");
+MODULE_LICENSE("GPL");
index 1fbb2c2..8565a53 100644 (file)
@@ -796,6 +796,28 @@ static int avs_component_probe(struct snd_soc_component *component)
 
        ret = avs_load_topology(component, filename);
        kfree(filename);
+       if (ret == -ENOENT && !strncmp(mach->tplg_filename, "hda-", 4)) {
+               unsigned int vendor_id;
+
+               if (sscanf(mach->tplg_filename, "hda-%08x-tplg.bin", &vendor_id) != 1)
+                       return ret;
+
+               if (((vendor_id >> 16) & 0xFFFF) == 0x8086)
+                       mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL,
+                                                            "hda-8086-generic-tplg.bin");
+               else
+                       mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL,
+                                                            "hda-generic-tplg.bin");
+
+               filename = kasprintf(GFP_KERNEL, "%s/%s", component->driver->topology_name_prefix,
+                                    mach->tplg_filename);
+               if (!filename)
+                       return -ENOMEM;
+
+               dev_info(card->dev, "trying to load fallback topology %s\n", mach->tplg_filename);
+               ret = avs_load_topology(component, filename);
+               kfree(filename);
+       }
        if (ret < 0)
                return ret;
 
index f18406d..ba7c0ae 100644 (file)
@@ -1054,7 +1054,7 @@ int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream)
 
 int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
                               int channel, unsigned long pos,
-                              struct iov_iter *buf, unsigned long bytes)
+                              struct iov_iter *iter, unsigned long bytes)
 {
        struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
        struct snd_soc_component *component;
@@ -1065,7 +1065,7 @@ int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
                if (component->driver->copy)
                        return soc_component_ret(component,
                                component->driver->copy(component, substream,
-                                       channel, pos, buf, bytes));
+                                       channel, pos, iter, bytes));
 
        return -EINVAL;
 }
index ff21665..d0653d7 100644 (file)
@@ -290,29 +290,29 @@ static snd_pcm_uframes_t dmaengine_pcm_pointer(
 static int dmaengine_copy(struct snd_soc_component *component,
                          struct snd_pcm_substream *substream,
                          int channel, unsigned long hwoff,
-                         struct iov_iter *buf, unsigned long bytes)
+                         struct iov_iter *iter, unsigned long bytes)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct dmaengine_pcm *pcm = soc_component_to_pcm(component);
        int (*process)(struct snd_pcm_substream *substream,
                       int channel, unsigned long hwoff,
-                      struct iov_iter *buf, unsigned long bytes) = pcm->config->process;
+                      unsigned long bytes) = pcm->config->process;
        bool is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
        void *dma_ptr = runtime->dma_area + hwoff +
                        channel * (runtime->dma_bytes / runtime->channels);
 
        if (is_playback)
-               if (copy_from_iter(dma_ptr, bytes, buf) != bytes)
+               if (copy_from_iter(dma_ptr, bytes, iter) != bytes)
                        return -EFAULT;
 
        if (process) {
-               int ret = process(substream, channel, hwoff, buf, bytes);
+               int ret = process(substream, channel, hwoff, bytes);
                if (ret < 0)
                        return ret;
        }
 
        if (!is_playback)
-               if (copy_to_iter(dma_ptr, bytes, buf) != bytes)
+               if (copy_to_iter(dma_ptr, bytes, iter) != bytes)
                        return -EFAULT;
 
        return 0;
index f9b5d59..0acc848 100644 (file)
@@ -1246,7 +1246,7 @@ static const struct snd_soc_dai_ops stm32_sai_pcm_dai_ops2 = {
 
 static int stm32_sai_pcm_process_spdif(struct snd_pcm_substream *substream,
                                       int channel, unsigned long hwoff,
-                                      struct iov_iter *buf, unsigned long bytes)
+                                      unsigned long bytes)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
index a27e244..1ec177f 100644 (file)
@@ -265,7 +265,7 @@ static void free_midi_urbs(struct snd_usb_midi2_endpoint *ep)
 
        if (!ep)
                return;
-       for (i = 0; i < ep->num_urbs; ++i) {
+       for (i = 0; i < NUM_URBS; ++i) {
                ctx = &ep->urbs[i];
                if (!ctx->urb)
                        break;
@@ -279,6 +279,7 @@ static void free_midi_urbs(struct snd_usb_midi2_endpoint *ep)
 }
 
 /* allocate URBs for an EP */
+/* the callers should handle allocation errors via free_midi_urbs() */
 static int alloc_midi_urbs(struct snd_usb_midi2_endpoint *ep)
 {
        struct snd_usb_midi2_urb *ctx;
@@ -351,8 +352,10 @@ static int snd_usb_midi_v2_open(struct snd_ump_endpoint *ump, int dir)
                return -EIO;
        if (ep->direction == STR_OUT) {
                err = alloc_midi_urbs(ep);
-               if (err)
+               if (err) {
+                       free_midi_urbs(ep);
                        return err;
+               }
        }
        return 0;
 }
index 1f6d904..798e60b 100644 (file)
 #define X86_FEATURE_SEV_ES             (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
 #define X86_FEATURE_V_TSC_AUX          (19*32+ 9) /* "" Virtual TSC_AUX */
 #define X86_FEATURE_SME_COHERENT       (19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP         (19*32+14) /* AMD SEV-ES full debug state swap support */
 
 /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
 #define X86_FEATURE_NO_NESTED_DATA_BP  (20*32+ 0) /* "" No Nested Data Breakpoints */
index 0b214f6..2e5c231 100644 (file)
@@ -83,7 +83,7 @@ const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 #define perf_event_name(array, id) ({                  \
        const char *event_str = NULL;                   \
                                                        \
-       if ((id) >= 0 && (id) < ARRAY_SIZE(array))      \
+       if ((id) < ARRAY_SIZE(array))                   \
                event_str = array[id];                  \
        event_str;                                      \
 })
index 8943033..fac4248 100644 (file)
@@ -117,6 +117,16 @@ $(OUTPUT)%.s: %.c FORCE
        $(call rule_mkdir)
        $(call if_changed_dep,cc_s_c)
 
+# bison and flex files are generated in the OUTPUT directory
+# so it needs a separate rule to depend on them properly
+$(OUTPUT)%-bison.o: $(OUTPUT)%-bison.c FORCE
+       $(call rule_mkdir)
+       $(call if_changed_dep,$(host)cc_o_c)
+
+$(OUTPUT)%-flex.o: $(OUTPUT)%-flex.c FORCE
+       $(call rule_mkdir)
+       $(call if_changed_dep,$(host)cc_o_c)
+
 # Gather build data:
 #   obj-y        - list of build objects
 #   subdir-y     - list of directories to nest
index f0c5de0..dad79ed 100644 (file)
@@ -340,7 +340,7 @@ $(OUTPUT)test-jvmti-cmlr.bin:
        $(BUILD)
 
 $(OUTPUT)test-llvm.bin:
-       $(BUILDXX) -std=gnu++14                                 \
+       $(BUILDXX) -std=gnu++17                                 \
                -I$(shell $(LLVM_CONFIG) --includedir)          \
                -L$(shell $(LLVM_CONFIG) --libdir)              \
                $(shell $(LLVM_CONFIG) --libs Core BPF)         \
@@ -348,17 +348,15 @@ $(OUTPUT)test-llvm.bin:
                > $(@:.bin=.make.output) 2>&1
 
 $(OUTPUT)test-llvm-version.bin:
-       $(BUILDXX) -std=gnu++14                                 \
+       $(BUILDXX) -std=gnu++17                                 \
                -I$(shell $(LLVM_CONFIG) --includedir)          \
                > $(@:.bin=.make.output) 2>&1
 
 $(OUTPUT)test-clang.bin:
-       $(BUILDXX) -std=gnu++14                                 \
+       $(BUILDXX) -std=gnu++17                                 \
                -I$(shell $(LLVM_CONFIG) --includedir)          \
                -L$(shell $(LLVM_CONFIG) --libdir)              \
-               -Wl,--start-group -lclangBasic -lclangDriver    \
-                 -lclangFrontend -lclangEdit -lclangLex        \
-                 -lclangAST -Wl,--end-group                    \
+               -Wl,--start-group -lclang-cpp -Wl,--end-group   \
                $(shell $(LLVM_CONFIG) --libs Core option)      \
                $(shell $(LLVM_CONFIG) --system-libs)           \
                > $(@:.bin=.make.output) 2>&1
diff --git a/tools/build/feature/test-clang.cpp b/tools/build/feature/test-clang.cpp
deleted file mode 100644 (file)
index 7d87075..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "clang/Basic/Version.h"
-#if CLANG_VERSION_MAJOR < 8
-#include "clang/Basic/VirtualFileSystem.h"
-#endif
-#include "clang/Driver/Driver.h"
-#include "clang/Frontend/TextDiagnosticPrinter.h"
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/Support/ManagedStatic.h"
-#if CLANG_VERSION_MAJOR >= 8
-#include "llvm/Support/VirtualFileSystem.h"
-#endif
-#include "llvm/Support/raw_ostream.h"
-
-using namespace clang;
-using namespace clang::driver;
-
-int main()
-{
-       IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-       IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
-
-       DiagnosticsEngine Diags(DiagID, &*DiagOpts);
-       Driver TheDriver("test", "bpf-pc-linux", Diags);
-
-       llvm::llvm_shutdown();
-       return 0;
-}
diff --git a/tools/build/feature/test-cxx.cpp b/tools/build/feature/test-cxx.cpp
deleted file mode 100644 (file)
index 396aaed..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <iostream>
-#include <memory>
-
-static void print_str(std::string s)
-{
-       std::cout << s << std::endl;
-}
-
-int main()
-{
-       std::string s("Hello World!");
-       print_str(std::move(s));
-       std::cout << "|" << s << "|" << std::endl;
-       return 0;
-}
diff --git a/tools/build/feature/test-llvm-version.cpp b/tools/build/feature/test-llvm-version.cpp
deleted file mode 100644 (file)
index 8a09162..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <cstdio>
-#include "llvm/Config/llvm-config.h"
-
-#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH)
-#define pass int main() {printf("%x\n", NUM_VERSION); return 0;}
-
-#if NUM_VERSION >= 0x030900
-pass
-#else
-# error This LLVM is not tested yet.
-#endif
diff --git a/tools/build/feature/test-llvm.cpp b/tools/build/feature/test-llvm.cpp
deleted file mode 100644 (file)
index 88a3d1b..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
-#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH)
-
-#if NUM_VERSION < 0x030900
-# error "LLVM version too low"
-#endif
-int main()
-{
-       llvm::errs() << "Hello World!\n";
-       llvm::llvm_shutdown();
-       return 0;
-}
index ba2dcf6..ae64090 100644 (file)
@@ -148,8 +148,18 @@ struct perf_record_switch {
 struct perf_record_header_attr {
        struct perf_event_header header;
        struct perf_event_attr   attr;
-       __u64                    id[];
-};
+       /*
+        * Array of u64 id follows here but we cannot use a flexible array
+        * because size of attr in the data can be different then current
+        * version.  Please use perf_record_header_attr_id() below.
+        *
+        * __u64                 id[];  // do not use this
+        */
+};
+
+/* Returns the pointer to id array based on the actual attr size. */
+#define perf_record_header_attr_id(evt)                        \
+       ((void *)&(evt)->attr.attr + (evt)->attr.attr.size)
 
 enum {
        PERF_CPU_MAP__CPUS = 0,
index 6c1da51..1c5606c 100644 (file)
@@ -8,8 +8,8 @@ TARGETS=page-types slabinfo page_owner_sort
 LIB_DIR = ../lib/api
 LIBS = $(LIB_DIR)/libapi.a
 
-CFLAGS += -Wall -Wextra -I../lib/
-LDFLAGS += $(LIBS)
+CFLAGS += -Wall -Wextra -I../lib/ -pthread
+LDFLAGS += $(LIBS) -pthread
 
 all: $(TARGETS)
 
index f04f0ea..ca57896 100644 (file)
@@ -67,6 +67,9 @@ SUBSYSTEM
 'internals'::
        Benchmark internal perf functionality.
 
+'uprobe'::
+       Benchmark overhead of uprobe + BPF.
+
 'all'::
        All benchmark subsystems.
 
index 1478068..0b4e79d 100644 (file)
@@ -125,9 +125,6 @@ Given a $HOME/.perfconfig like this:
                group = true
                skip-empty = true
 
-       [llvm]
-               dump-obj = true
-               clang-opt = -g
 
 You can hide source code of annotate feature setting the config to false with
 
@@ -657,36 +654,6 @@ ftrace.*::
                -F option is not specified. Possible values are 'function' and
                'function_graph'.
 
-llvm.*::
-       llvm.clang-path::
-               Path to clang. If omit, search it from $PATH.
-
-       llvm.clang-bpf-cmd-template::
-               Cmdline template. Below lines show its default value. Environment
-               variable is used to pass options.
-               "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
-               "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE "     \
-               "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
-               "-Wno-unused-value -Wno-pointer-sign "          \
-               "-working-directory $WORKING_DIR "              \
-               "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
-
-       llvm.clang-opt::
-               Options passed to clang.
-
-       llvm.kbuild-dir::
-               kbuild directory. If not set, use /lib/modules/`uname -r`/build.
-               If set to "" deliberately, skip kernel header auto-detector.
-
-       llvm.kbuild-opts::
-               Options passed to 'make' when detecting kernel header options.
-
-       llvm.dump-obj::
-               Enable perf dump BPF object files compiled by LLVM.
-
-       llvm.opts::
-               Options passed to llc.
-
 samples.*::
 
        samples.context::
index fb22e3b..8887cc2 100644 (file)
@@ -64,6 +64,12 @@ internal filtering.
 If implemented, 'filter_description' should return a one-line description
 of the filter, and optionally a longer description.
 
+Do not assume the 'sample' argument is valid (dereferenceable)
+after 'filter_event' and 'filter_event_early' return.
+
+Do not assume data referenced by pointers in struct perf_dlfilter_sample
+is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return.
+
 The perf_dlfilter_sample structure
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -150,7 +156,8 @@ struct perf_dlfilter_fns {
        const char *(*srcline)(void *ctx, __u32 *line_number);
        struct perf_event_attr *(*attr)(void *ctx);
        __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
-       void *(*reserved[120])(void *);
+       void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
+       void *(*reserved[119])(void *);
 };
 ----
 
@@ -161,7 +168,8 @@ struct perf_dlfilter_fns {
 'args' returns arguments from --dlarg options.
 
 'resolve_address' provides information about 'address'. al->size must be set
-before calling. Returns 0 on success, -1 otherwise.
+before calling. Returns 0 on success, -1 otherwise. Call al_cleanup() (if present,
+see below) when 'al' data is no longer needed.
 
 'insn' returns instruction bytes and length.
 
@@ -171,6 +179,12 @@ before calling. Returns 0 on success, -1 otherwise.
 
 'object_code' reads object code and returns the number of bytes read.
 
+'al_cleanup' must be called (if present, so check perf_dlfilter_fns.al_cleanup != NULL)
+after resolve_address() to free any associated resources.
+
+Do not assume pointers obtained via perf_dlfilter_fns are valid (dereferenceable)
+after 'filter_event' and 'filter_event_early' return.
+
 The perf_dlfilter_al structure
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -197,9 +211,13 @@ struct perf_dlfilter_al {
        /* Below members are only populated by resolve_ip() */
        __u8 filtered; /* true if this sample event will be filtered out */
        const char *comm;
+       void *priv; /* Private data. Do not change */
 };
 ----
 
+Do not assume data referenced by pointers in struct perf_dlfilter_al
+is valid (dereferenceable) after 'filter_event' and 'filter_event_early' return.
+
 perf_dlfilter_sample flags
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
index df45955..d780b93 100644 (file)
@@ -96,8 +96,9 @@ OPTIONS for 'perf ftrace trace'
 
 --func-opts::
        List of options allowed to set:
-         call-graph - Display kernel stack trace for function tracer.
-         irq-info   - Display irq context info for function tracer.
+
+         - call-graph - Display kernel stack trace for function tracer.
+         - irq-info   - Display irq context info for function tracer.
 
 -G::
 --graph-funcs=::
@@ -118,11 +119,12 @@ OPTIONS for 'perf ftrace trace'
 
 --graph-opts::
        List of options allowed to set:
-         nosleep-time - Measure on-CPU time only for function_graph tracer.
-         noirqs       - Ignore functions that happen inside interrupt.
-         verbose      - Show process names, PIDs, timestamps, etc.
-         thresh=<n>   - Setup trace duration threshold in microseconds.
-         depth=<n>    - Set max depth for function graph tracer to follow.
+
+         - nosleep-time - Measure on-CPU time only for function_graph tracer.
+         - noirqs       - Ignore functions that happen inside interrupt.
+         - verbose      - Show process names, PIDs, timestamps, etc.
+         - thresh=<n>   - Setup trace duration threshold in microseconds.
+         - depth=<n>    - Set max depth for function graph tracer to follow.
 
 
 OPTIONS for 'perf ftrace latency'
index 680396c..d5217be 100644 (file)
@@ -99,20 +99,6 @@ OPTIONS
           If you want to profile write accesses in [0x1000~1008), just set
           'mem:0x1000/8:w'.
 
-        - a BPF source file (ending in .c) or a precompiled object file (ending
-          in .o) selects one or more BPF events.
-          The BPF program can attach to various perf events based on the ELF section
-          names.
-
-          When processing a '.c' file, perf searches an installed LLVM to compile it
-          into an object file first. Optional clang options can be passed via the
-          '--clang-opt' command line option, e.g.:
-
-            perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \
-                        -e tests/bpf-script-example.c
-
-          Note: '--clang-opt' must be placed before '--event/-e'.
-
        - a group of events surrounded by a pair of brace ("{event1,event2,...}").
          Each event is separated by commas and the group should be quoted to
          prevent the shell interpretation.  You also need to use --group on
@@ -523,9 +509,10 @@ CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
 Select AUX area tracing Snapshot Mode. This option is valid only with an
 AUX area tracing event. Optionally, certain snapshot capturing parameters
 can be specified in a string that follows this option:
-  'e': take one last snapshot on exit; guarantees that there is at least one
+
+  - 'e': take one last snapshot on exit; guarantees that there is at least one
        snapshot in the output file;
-  <size>: if the PMU supports this, specify the desired snapshot size.
+  <size>: if the PMU supports this, specify the desired snapshot size.
 
 In Snapshot Mode trace data is captured only when signal SIGUSR2 is received
 and on exit if the above 'e' option is given.
@@ -547,14 +534,6 @@ PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE)
 switch events will be enabled automatically, which can be suppressed by
 by the option --no-switch-events.
 
---clang-path=PATH::
-Path to clang binary to use for compiling BPF scriptlets.
-(enabled when BPF support is on)
-
---clang-opt=OPTIONS::
-Options passed to clang when compiling BPF scriptlets.
-(enabled when BPF support is on)
-
 --vmlinux=PATH::
 Specify vmlinux path which has debuginfo.
 (enabled when BPF prologue is on)
@@ -572,8 +551,9 @@ providing implementation for Posix AIO API.
 
 --affinity=mode::
 Set affinity mask of trace reading thread according to the policy defined by 'mode' value:
-  node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
-  cpu  - thread affinity mask is set to cpu of the processed mmap buffer
+
+  - node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
+  - cpu  - thread affinity mask is set to cpu of the processed mmap buffer
 
 --mmap-flush=number::
 
@@ -625,16 +605,17 @@ Record timestamp boundary (time of first/last samples).
 --switch-output[=mode]::
 Generate multiple perf.data files, timestamp prefixed, switching to a new one
 based on 'mode' value:
-  "signal" - when receiving a SIGUSR2 (default value) or
-  <size>   - when reaching the size threshold, size is expected to
-             be a number with appended unit character - B/K/M/G
-  <time>   - when reaching the time threshold, size is expected to
-             be a number with appended unit character - s/m/h/d
 
-             Note: the precision of  the size  threshold  hugely depends
-             on your configuration  - the number and size of  your  ring
-             buffers (-m). It is generally more precise for higher sizes
-             (like >5M), for lower values expect different sizes.
+  - "signal" - when receiving a SIGUSR2 (default value) or
+  - <size>   - when reaching the size threshold, size is expected to
+               be a number with appended unit character - B/K/M/G
+  - <time>   - when reaching the time threshold, size is expected to
+               be a number with appended unit character - s/m/h/d
+
+               Note: the precision of  the size  threshold  hugely depends
+               on your configuration  - the number and size of  your  ring
+               buffers (-m). It is generally more precise for higher sizes
+               (like >5M), for lower values expect different sizes.
 
 A possible use case is to, given an external event, slice the perf.data file
 that gets then processed, possibly via a perf script, to decide if that
@@ -680,11 +661,12 @@ choice in this option.  For example, --synth=no would have MMAP events for
 kernel and modules.
 
 Available types are:
-  'task'    - synthesize FORK and COMM events for each task
-  'mmap'    - synthesize MMAP events for each process (implies 'task')
-  'cgroup'  - synthesize CGROUP events for each cgroup
-  'all'     - synthesize all events (default)
-  'no'      - do not synthesize any of the above events
+
+  - 'task'    - synthesize FORK and COMM events for each task
+  - 'mmap'    - synthesize MMAP events for each process (implies 'task')
+  - 'cgroup'  - synthesize CGROUP events for each cgroup
+  - 'all'     - synthesize all events (default)
+  - 'no'      - do not synthesize any of the above events
 
 --tail-synthesize::
 Instead of collecting non-sample events (for example, fork, comm, mmap) at
@@ -736,18 +718,19 @@ ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows.
 Listen on ctl-fd descriptor for command to control measurement.
 
 Available commands:
-  'enable'           : enable events
-  'disable'          : disable events
-  'enable name'      : enable event 'name'
-  'disable name'     : disable event 'name'
-  'snapshot'         : AUX area tracing snapshot).
-  'stop'             : stop perf record
-  'ping'             : ping
-
-  'evlist [-v|-g|-F] : display all events
-                       -F  Show just the sample frequency used for each event.
-                       -v  Show all fields.
-                       -g  Show event group information.
+
+  - 'enable'           : enable events
+  - 'disable'          : disable events
+  - 'enable name'      : enable event 'name'
+  - 'disable name'     : disable event 'name'
+  - 'snapshot'         : AUX area tracing snapshot).
+  - 'stop'             : stop perf record
+  - 'ping'             : ping
+  - 'evlist [-v|-g|-F] : display all events
+
+                         -F  Show just the sample frequency used for each event.
+                         -v  Show all fields.
+                         -g  Show event group information.
 
 Measurements can be started with events disabled using --delay=-1 option. Optionally
 send control command completion ('ack\n') to ack-fd descriptor to synchronize with the
@@ -808,10 +791,10 @@ the second monitors CPUs 1 and 5-7 with the affinity mask 5-7.
 <spec> value can also be a string meaning predefined parallel threads
 layout:
 
-    cpu    - create new data streaming thread for every monitored cpu
-    core   - create new thread to monitor CPUs grouped by a core
-    package - create new thread to monitor CPUs grouped by a package
-    numa   - create new threed to monitor CPUs grouped by a NUMA domain
+    cpu    - create new data streaming thread for every monitored cpu
+    core   - create new thread to monitor CPUs grouped by a core
+    package - create new thread to monitor CPUs grouped by a package
+    numa   - create new threed to monitor CPUs grouped by a NUMA domain
 
 Predefined layouts can be used on systems with large number of CPUs in
 order not to spawn multiple per-cpu streaming threads but still avoid LOST
index 635ba04..010a4ed 100644 (file)
@@ -43,7 +43,7 @@ struct perf_file_section {
 
 Flags section:
 
-For each of the optional features a perf_file_section it placed after the data
+For each of the optional features a perf_file_section is placed after the data
 section if the feature bit is set in the perf_header flags bitset. The
 respective perf_file_section points to the data of the additional header and
 defines its size.
index c5db0de..d66b524 100644 (file)
@@ -246,6 +246,9 @@ ifeq ($(CC_NO_CLANG), 0)
 else
   CORE_CFLAGS += -O6
 endif
+else
+  CORE_CFLAGS += -g
+  CXXFLAGS += -g
 endif
 
 ifdef PARSER_DEBUG
@@ -256,6 +259,11 @@ ifdef PARSER_DEBUG
   $(call detected_var,PARSER_DEBUG_FLEX)
 endif
 
+ifdef LTO
+  CORE_CFLAGS += -flto
+  CXXFLAGS += -flto
+endif
+
 # Try different combinations to accommodate systems that only have
 # python[2][3]-config in weird combinations in the following order of
 # priority from lowest to highest:
@@ -319,18 +327,14 @@ FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
 FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
 
 CORE_CFLAGS += -fno-omit-frame-pointer
-CORE_CFLAGS += -ggdb3
-CORE_CFLAGS += -funwind-tables
 CORE_CFLAGS += -Wall
 CORE_CFLAGS += -Wextra
 CORE_CFLAGS += -std=gnu11
 
-CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti
+CXXFLAGS += -std=gnu++17 -fno-exceptions -fno-rtti
 CXXFLAGS += -Wall
+CXXFLAGS += -Wextra
 CXXFLAGS += -fno-omit-frame-pointer
-CXXFLAGS += -ggdb3
-CXXFLAGS += -funwind-tables
-CXXFLAGS += -Wno-strict-aliasing
 
 HOSTCFLAGS += -Wall
 HOSTCFLAGS += -Wextra
@@ -585,18 +589,6 @@ ifndef NO_LIBELF
        LIBBPF_STATIC := 1
       endif
     endif
-
-    ifndef NO_DWARF
-      ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
-        CFLAGS += -DHAVE_BPF_PROLOGUE
-        $(call detected,CONFIG_BPF_PROLOGUE)
-      else
-        msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset());
-      endif
-    else
-      msg := $(warning DWARF support is off, BPF prologue is disabled);
-    endif
-
   endif # NO_LIBBPF
 endif # NO_LIBELF
 
@@ -1123,37 +1115,6 @@ ifndef NO_JVMTI
   endif
 endif
 
-USE_CXX = 0
-USE_CLANGLLVM = 0
-ifdef LIBCLANGLLVM
-  $(call feature_check,cxx)
-  ifneq ($(feature-cxx), 1)
-    msg := $(warning No g++ found, disable clang and llvm support. Please install g++)
-  else
-    $(call feature_check,llvm)
-    $(call feature_check,llvm-version)
-    ifneq ($(feature-llvm), 1)
-      msg := $(warning No suitable libLLVM found, disabling builtin clang and LLVM support. Please install llvm-dev(el) (>= 3.9.0))
-    else
-      $(call feature_check,clang)
-      ifneq ($(feature-clang), 1)
-        msg := $(warning No suitable libclang found, disabling builtin clang and LLVM support. Please install libclang-dev(el) (>= 3.9.0))
-      else
-        CFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT
-        CXXFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT -I$(shell $(LLVM_CONFIG) --includedir)
-        $(call detected,CONFIG_CXX)
-        $(call detected,CONFIG_CLANGLLVM)
-       USE_CXX = 1
-       USE_LLVM = 1
-       USE_CLANG = 1
-        ifneq ($(feature-llvm-version),1)
-          msg := $(warning This version of LLVM is not tested. May cause build errors)
-        endif
-      endif
-    endif
-  endif
-endif
-
 ifndef NO_LIBPFM4
   $(call feature_check,libpfm4)
   ifeq ($(feature-libpfm4), 1)
index 097316e..37af6df 100644 (file)
@@ -99,10 +99,6 @@ include ../scripts/utilities.mak
 # Define NO_JVMTI_CMLR (debug only) if you do not want to process CMLR
 # data for java source lines.
 #
-# Define LIBCLANGLLVM if you DO want builtin clang and llvm support.
-# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if
-# llvm-config is not in $PATH.
-#
 # Define CORESIGHT if you DO WANT support for CoreSight trace decoding.
 #
 # Define NO_AIO if you do not want support of Posix AIO based trace
@@ -381,7 +377,7 @@ ifndef NO_JVMTI
 PROGRAMS += $(OUTPUT)$(LIBJVMTI)
 endif
 
-DLFILTERS := dlfilter-test-api-v0.so dlfilter-show-cycles.so
+DLFILTERS := dlfilter-test-api-v0.so dlfilter-test-api-v2.so dlfilter-show-cycles.so
 DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS))
 
 # what 'all' will build and 'install' will install, in perfexecdir
@@ -425,22 +421,6 @@ endif
 EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS))
 LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
 
-ifeq ($(USE_CLANG), 1)
-  CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization
-  CLANGLIBS_NOEXT_LIST = $(foreach l,$(CLANGLIBS_LIST),$(shell $(LLVM_CONFIG) --libdir)/libclang$(l))
-  LIBCLANG = $(foreach l,$(CLANGLIBS_NOEXT_LIST),$(wildcard $(l).a $(l).so))
-  LIBS += -Wl,--start-group $(LIBCLANG) -Wl,--end-group
-endif
-
-ifeq ($(USE_LLVM), 1)
-  LIBLLVM = $(shell $(LLVM_CONFIG) --libs all) $(shell $(LLVM_CONFIG) --system-libs)
-  LIBS += -L$(shell $(LLVM_CONFIG) --libdir) $(LIBLLVM)
-endif
-
-ifeq ($(USE_CXX), 1)
-  LIBS += -lstdc++
-endif
-
 export INSTALL SHELL_PATH
 
 ### Build rules
@@ -978,11 +958,6 @@ ifndef NO_JVMTI
 endif
        $(call QUIET_INSTALL, libexec) \
                $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-ifndef NO_LIBBPF
-       $(call QUIET_INSTALL, bpf-examples) \
-               $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
-               $(INSTALL) examples/bpf/*.c -m 644 -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
-endif
        $(call QUIET_INSTALL, perf-archive) \
                $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
        $(call QUIET_INSTALL, perf-iostat) \
@@ -1057,6 +1032,8 @@ SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
 SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
 SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h
+SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h
+SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h
 
 $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT):
        $(Q)$(MKDIR) -p $@
@@ -1079,10 +1056,15 @@ ifneq ($(CROSS_COMPILE),)
 CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
 endif
 
+CLANG_OPTIONS = -Wall
 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
 BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES)
 TOOLS_UAPI_INCLUDE := -I$(srctree)/tools/include/uapi
 
+ifneq ($(WERROR),0)
+  CLANG_OPTIONS += -Werror
+endif
+
 $(BPFTOOL): | $(SKEL_TMP_OUT)
        $(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \
                OUTPUT=$(SKEL_TMP_OUT)/ bootstrap
@@ -1124,7 +1106,7 @@ else
 endif
 
 $(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) $(SKEL_OUT)/vmlinux.h | $(SKEL_TMP_OUT)
-       $(QUIET_CLANG)$(CLANG) -g -O2 --target=bpf -Wall -Werror $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \
+       $(QUIET_CLANG)$(CLANG) -g -O2 --target=bpf $(CLANG_OPTIONS) $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \
          -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@
 
 $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL)
index 99a0655..75ce1c3 100644 (file)
@@ -12,7 +12,4 @@ void perf_regs_load(u64 *regs);
 #define PERF_REGS_MAX  PERF_REG_ARM_MAX
 #define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_32
 
-#define PERF_REG_IP    PERF_REG_ARM_PC
-#define PERF_REG_SP    PERF_REG_ARM_SP
-
 #endif /* ARCH_PERF_REGS_H */
index 7c51fa1..b8d6a95 100644 (file)
@@ -79,9 +79,9 @@ static int cs_etm_validate_context_id(struct auxtrace_record *itr,
        int err;
        u32 val;
        u64 contextid = evsel->core.attr.config &
-               (perf_pmu__format_bits(&cs_etm_pmu->format, "contextid") |
-                perf_pmu__format_bits(&cs_etm_pmu->format, "contextid1") |
-                perf_pmu__format_bits(&cs_etm_pmu->format, "contextid2"));
+               (perf_pmu__format_bits(cs_etm_pmu, "contextid") |
+                perf_pmu__format_bits(cs_etm_pmu, "contextid1") |
+                perf_pmu__format_bits(cs_etm_pmu, "contextid2"));
 
        if (!contextid)
                return 0;
@@ -106,7 +106,7 @@ static int cs_etm_validate_context_id(struct auxtrace_record *itr,
        }
 
        if (contextid &
-           perf_pmu__format_bits(&cs_etm_pmu->format, "contextid1")) {
+           perf_pmu__format_bits(cs_etm_pmu, "contextid1")) {
                /*
                 * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID
                 * tracing is supported:
@@ -122,7 +122,7 @@ static int cs_etm_validate_context_id(struct auxtrace_record *itr,
        }
 
        if (contextid &
-           perf_pmu__format_bits(&cs_etm_pmu->format, "contextid2")) {
+           perf_pmu__format_bits(cs_etm_pmu, "contextid2")) {
                /*
                 * TRCIDR2.VMIDOPT[30:29] != 0 and
                 * TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid)
@@ -151,7 +151,7 @@ static int cs_etm_validate_timestamp(struct auxtrace_record *itr,
        u32 val;
 
        if (!(evsel->core.attr.config &
-             perf_pmu__format_bits(&cs_etm_pmu->format, "timestamp")))
+             perf_pmu__format_bits(cs_etm_pmu, "timestamp")))
                return 0;
 
        if (!cs_etm_is_etmv4(itr, cpu)) {
index 2833e10..2c56e8b 100644 (file)
@@ -1,6 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
+#include "perf_regs.h"
 #include "../../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
+
+uint64_t arch__intr_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
+
+uint64_t arch__user_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
index 1834a0c..4e02cef 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <elfutils/libdwfl.h>
+#include "perf_regs.h"
 #include "../../../util/unwind-libdw.h"
 #include "../../../util/perf_regs.h"
 #include "../../../util/sample.h"
index 452b3d9..474d7cf 100644 (file)
@@ -2,6 +2,9 @@
 #ifndef ARCH_TESTS_H
 #define ARCH_TESTS_H
 
+struct test_suite;
+
+int test__cpuid_match(struct test_suite *test, int subtest);
 extern struct test_suite *arch_tests[];
 
 #endif
index 35a3cc7..58639ee 100644 (file)
@@ -14,7 +14,4 @@ void perf_regs_load(u64 *regs);
 #define PERF_REGS_MAX  PERF_REG_ARM64_MAX
 #define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
 
-#define PERF_REG_IP    PERF_REG_ARM64_PC
-#define PERF_REG_SP    PERF_REG_ARM64_SP
-
 #endif /* ARCH_PERF_REGS_H */
index a61c06b..e337c09 100644 (file)
@@ -2,3 +2,4 @@ perf-y += regs_load.o
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 
 perf-y += arch-tests.o
+perf-y += cpuid-match.o
index ad16b4f..74932e7 100644 (file)
@@ -3,9 +3,13 @@
 #include "tests/tests.h"
 #include "arch-tests.h"
 
+
+DEFINE_SUITE("arm64 CPUID matching", cpuid_match);
+
 struct test_suite *arch_tests[] = {
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
        &suite__dwarf_unwind,
 #endif
+       &suite__cpuid_match,
        NULL,
 };
diff --git a/tools/perf/arch/arm64/tests/cpuid-match.c b/tools/perf/arch/arm64/tests/cpuid-match.c
new file mode 100644 (file)
index 0000000..e8e3947
--- /dev/null
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+
+#include "arch-tests.h"
+#include "tests/tests.h"
+#include "util/header.h"
+
+int test__cpuid_match(struct test_suite *test __maybe_unused,
+                            int subtest __maybe_unused)
+{
+       /* midr with no leading zeros matches */
+       if (strcmp_cpuid_str("0x410fd0c0", "0x00000000410fd0c0"))
+               return -1;
+       /* Upper case matches */
+       if (strcmp_cpuid_str("0x410fd0c0", "0x00000000410FD0C0"))
+               return -1;
+       /* r0p0 = r0p0 matches */
+       if (strcmp_cpuid_str("0x00000000410fd480", "0x00000000410fd480"))
+               return -1;
+       /* r0p1 > r0p0 matches */
+       if (strcmp_cpuid_str("0x00000000410fd480", "0x00000000410fd481"))
+               return -1;
+       /* r1p0 > r0p0 matches*/
+       if (strcmp_cpuid_str("0x00000000410fd480", "0x00000000411fd480"))
+               return -1;
+       /* r0p0 < r0p1 doesn't match */
+       if (!strcmp_cpuid_str("0x00000000410fd481", "0x00000000410fd480"))
+               return -1;
+       /* r0p0 < r1p0 doesn't match */
+       if (!strcmp_cpuid_str("0x00000000411fd480", "0x00000000410fd480"))
+               return -1;
+       /* Different CPU doesn't match */
+       if (!strcmp_cpuid_str("0x00000000410fd4c0", "0x00000000430f0af0"))
+               return -1;
+
+       return 0;
+}
index 3b1676f..9cc3d6d 100644 (file)
@@ -230,7 +230,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
         * inform that the resulting output's SPE samples contain physical addresses
         * where applicable.
         */
-       bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable");
+       bit = perf_pmu__format_bits(arm_spe_pmu, "pa_enable");
        if (arm_spe_evsel->core.attr.config & bit)
                evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
 
index 80b9f62..a2eef9e 100644 (file)
@@ -1,3 +1,6 @@
+#include <linux/kernel.h>
+#include <linux/bits.h>
+#include <linux/bitfield.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <perf/cpumap.h>
 
 #define MIDR "/regs/identification/midr_el1"
 #define MIDR_SIZE 19
-#define MIDR_REVISION_MASK      0xf
-#define MIDR_VARIANT_SHIFT      20
-#define MIDR_VARIANT_MASK       (0xf << MIDR_VARIANT_SHIFT)
+#define MIDR_REVISION_MASK      GENMASK(3, 0)
+#define MIDR_VARIANT_MASK      GENMASK(23, 20)
 
 static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
 {
        const char *sysfs = sysfs__mountpoint();
-       u64 midr = 0;
        int cpu;
+       int ret = EINVAL;
 
        if (!sysfs || sz < MIDR_SIZE)
                return EINVAL;
@@ -44,22 +46,13 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus)
                }
                fclose(file);
 
-               /* Ignore/clear Variant[23:20] and
-                * Revision[3:0] of MIDR
-                */
-               midr = strtoul(buf, NULL, 16);
-               midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK));
-               scnprintf(buf, MIDR_SIZE, "0x%016lx", midr);
                /* got midr break loop */
+               ret = 0;
                break;
        }
 
        perf_cpu_map__put(cpus);
-
-       if (!midr)
-               return EINVAL;
-
-       return 0;
+       return ret;
 }
 
 int get_cpuid(char *buf, size_t sz)
@@ -99,3 +92,47 @@ char *get_cpuid_str(struct perf_pmu *pmu)
 
        return buf;
 }
+
+/*
+ * Return 0 if idstr is a higher or equal to version of the same part as
+ * mapcpuid. Therefore, if mapcpuid has 0 for revision and variant then any
+ * version of idstr will match as long as it's the same CPU type.
+ *
+ * Return 1 if the CPU type is different or the version of idstr is lower.
+ */
+int strcmp_cpuid_str(const char *mapcpuid, const char *idstr)
+{
+       u64 map_id = strtoull(mapcpuid, NULL, 16);
+       char map_id_variant = FIELD_GET(MIDR_VARIANT_MASK, map_id);
+       char map_id_revision = FIELD_GET(MIDR_REVISION_MASK, map_id);
+       u64 id = strtoull(idstr, NULL, 16);
+       char id_variant = FIELD_GET(MIDR_VARIANT_MASK, id);
+       char id_revision = FIELD_GET(MIDR_REVISION_MASK, id);
+       u64 id_fields = ~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK);
+
+       /* Compare without version first */
+       if ((map_id & id_fields) != (id & id_fields))
+               return 1;
+
+       /*
+        * ID matches, now compare version.
+        *
+        * Arm revisions (like r0p0) are compared here like two digit semver
+        * values eg. 1.3 < 2.0 < 2.1 < 2.2.
+        *
+        *  r = high value = 'Variant' field in MIDR
+        *  p = low value  = 'Revision' field in MIDR
+        *
+        */
+       if (id_variant > map_id_variant)
+               return 0;
+
+       if (id_variant == map_id_variant && id_revision >= map_id_revision)
+               return 0;
+
+       /*
+        * variant is less than mapfile variant or variants are the same but
+        * the revision doesn't match. Return no match.
+        */
+       return 1;
+}
index 235a0a1..ba11443 100644 (file)
@@ -6,6 +6,7 @@
 #include "debug.h"
 #include "symbol.h"
 #include "callchain.h"
+#include "perf_regs.h"
 #include "record.h"
 #include "util/perf_regs.h"
 
index df817d1..3bcc5c7 100644 (file)
@@ -20,7 +20,7 @@ struct perf_mem_event *perf_mem_events__ptr(int i)
        return &perf_mem_events[i];
 }
 
-char *perf_mem_events__name(int i, char *pmu_name __maybe_unused)
+const char *perf_mem_events__name(int i, const char *pmu_name __maybe_unused)
 {
        struct perf_mem_event *e = perf_mem_events__ptr(i);
 
index 006692c..1b79d8e 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 
+#include "perf_regs.h"
 #include "../../../perf-sys.h"
 #include "../../../util/debug.h"
 #include "../../../util/event.h"
@@ -139,6 +140,11 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
        return SDT_ARG_VALID;
 }
 
+uint64_t arch__intr_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
+
 uint64_t arch__user_reg_mask(void)
 {
        struct perf_event_attr attr = {
index 512a8f1..615084e 100644 (file)
@@ -2,28 +2,12 @@
 
 #include <internal/cpumap.h>
 #include "../../../util/cpumap.h"
+#include "../../../util/header.h"
 #include "../../../util/pmu.h"
 #include "../../../util/pmus.h"
 #include <api/fs/fs.h>
 #include <math.h>
 
-static struct perf_pmu *pmu__find_core_pmu(void)
-{
-       struct perf_pmu *pmu = NULL;
-
-       while ((pmu = perf_pmus__scan_core(pmu))) {
-               /*
-                * The cpumap should cover all CPUs. Otherwise, some CPUs may
-                * not support some events or have different event IDs.
-                */
-               if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu)
-                       return NULL;
-
-               return pmu;
-       }
-       return NULL;
-}
-
 const struct pmu_metrics_table *pmu_metrics_table__find(void)
 {
        struct perf_pmu *pmu = pmu__find_core_pmu();
index 0938508..e056d50 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <elfutils/libdwfl.h>
+#include "perf_regs.h"
 #include "../../../util/unwind-libdw.h"
 #include "../../../util/perf_regs.h"
 #include "../../../util/sample.h"
index 1afcc0e..076c774 100644 (file)
@@ -12,7 +12,4 @@
 #define PERF_REGS_MAX  PERF_REG_CSKY_MAX
 #define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_32
 
-#define PERF_REG_IP    PERF_REG_CSKY_PC
-#define PERF_REG_SP    PERF_REG_CSKY_SP
-
 #endif /* ARCH_PERF_REGS_H */
index 2864e2e..c0877c2 100644 (file)
@@ -1,6 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
+#include "perf_regs.h"
 #include "../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
+
+uint64_t arch__intr_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
+
+uint64_t arch__user_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
index 4bb4a06..79df437 100644 (file)
@@ -2,6 +2,7 @@
 // Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
 
 #include <elfutils/libdwfl.h>
+#include "perf_regs.h"
 #include "../../util/unwind-libdw.h"
 #include "../../util/perf_regs.h"
 #include "../../util/event.h"
index 7833c7d..45c799f 100644 (file)
@@ -7,8 +7,6 @@
 #include <asm/perf_regs.h>
 
 #define PERF_REGS_MAX PERF_REG_LOONGARCH_MAX
-#define PERF_REG_IP PERF_REG_LOONGARCH_PC
-#define PERF_REG_SP PERF_REG_LOONGARCH_R3
 
 #define PERF_REGS_MASK ((1ULL << PERF_REG_LOONGARCH_MAX) - 1)
 
index 2833e10..2c56e8b 100644 (file)
@@ -1,6 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
+#include "perf_regs.h"
 #include "../../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
+
+uint64_t arch__intr_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
+
+uint64_t arch__user_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
index a941538..7b3b9a4 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (C) 2020-2023 Loongson Technology Corporation Limited */
 
 #include <elfutils/libdwfl.h>
+#include "perf_regs.h"
 #include "../../util/unwind-libdw.h"
 #include "../../util/perf_regs.h"
 #include "../../util/sample.h"
index b8cd8bb..7082e91 100644 (file)
@@ -7,8 +7,6 @@
 #include <asm/perf_regs.h>
 
 #define PERF_REGS_MAX PERF_REG_MIPS_MAX
-#define PERF_REG_IP PERF_REG_MIPS_PC
-#define PERF_REG_SP PERF_REG_MIPS_R29
 
 #define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1)
 
index 2864e2e..c0877c2 100644 (file)
@@ -1,6 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
+#include "perf_regs.h"
 #include "../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
+
+uint64_t arch__intr_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
+
+uint64_t arch__user_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
index 9bb17c3..1c66f6b 100644 (file)
@@ -16,7 +16,4 @@ void perf_regs_load(u64 *regs);
        #define PERF_SAMPLE_REGS_ABI    PERF_SAMPLE_REGS_ABI_32
 #endif
 
-#define PERF_REG_IP     PERF_REG_POWERPC_NIP
-#define PERF_REG_SP     PERF_REG_POWERPC_R1
-
 #endif /* ARCH_PERF_REGS_H */
index 4120faf..78b986e 100644 (file)
@@ -3,10 +3,10 @@
 #include "mem-events.h"
 
 /* PowerPC does not support 'ldlat' parameter. */
-char *perf_mem_events__name(int i, char *pmu_name __maybe_unused)
+const char *perf_mem_events__name(int i, const char *pmu_name __maybe_unused)
 {
        if (i == PERF_MEM_EVENTS__LOAD)
-               return (char *) "cpu/mem-loads/";
+               return "cpu/mem-loads/";
 
-       return (char *) "cpu/mem-stores/";
+       return "cpu/mem-stores/";
 }
index 8d07a78..b38aa05 100644 (file)
@@ -4,6 +4,7 @@
 #include <regex.h>
 #include <linux/zalloc.h>
 
+#include "perf_regs.h"
 #include "../../../util/perf_regs.h"
 #include "../../../util/debug.h"
 #include "../../../util/event.h"
@@ -226,3 +227,8 @@ uint64_t arch__intr_reg_mask(void)
        }
        return mask;
 }
+
+uint64_t arch__user_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
index e616642..e9a5a8b 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <elfutils/libdwfl.h>
 #include <linux/kernel.h>
+#include "perf_regs.h"
 #include "../../../util/unwind-libdw.h"
 #include "../../../util/perf_regs.h"
 #include "../../../util/sample.h"
index 6944bf0..d482edb 100644 (file)
@@ -16,7 +16,4 @@
 #define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_32
 #endif
 
-#define PERF_REG_IP    PERF_REG_RISCV_PC
-#define PERF_REG_SP    PERF_REG_RISCV_SP
-
 #endif /* ARCH_PERF_REGS_H */
index 2864e2e..c0877c2 100644 (file)
@@ -1,6 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
+#include "perf_regs.h"
 #include "../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
+
+uint64_t arch__intr_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
+
+uint64_t arch__user_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
index 54a1987..5c98010 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. */
 
 #include <elfutils/libdwfl.h>
+#include "perf_regs.h"
 #include "../../util/unwind-libdw.h"
 #include "../../util/perf_regs.h"
 #include "../../util/sample.h"
index 52fcc08..130dfad 100644 (file)
@@ -11,7 +11,4 @@ void perf_regs_load(u64 *regs);
 #define PERF_REGS_MAX PERF_REG_S390_MAX
 #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
 
-#define PERF_REG_IP PERF_REG_S390_PC
-#define PERF_REG_SP PERF_REG_S390_R15
-
 #endif /* ARCH_PERF_REGS_H */
index 2864e2e..c0877c2 100644 (file)
@@ -1,6 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
+#include "perf_regs.h"
 #include "../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
+
+uint64_t arch__intr_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
+
+uint64_t arch__user_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
index 7d92452..f50fb6d 100644 (file)
@@ -5,6 +5,7 @@
 #include "../../util/event.h"
 #include "../../util/sample.h"
 #include "dwarf-regs-table.h"
+#include "perf_regs.h"
 
 
 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
index fa526a9..59d7914 100755 (executable)
@@ -24,7 +24,7 @@ sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
 grep '^[0-9]' "$in" | sort -n > $sorted_table
 
 max_nr=0
-while read nr abi name entry compat; do
+while read nr _abi name entry _compat; do
     if [ $nr -ge 512 ] ; then # discard compat sycalls
         break
     fi
index 16e23b7..f209ce2 100644 (file)
@@ -20,7 +20,5 @@ void perf_regs_load(u64 *regs);
 #define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT)
 #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
 #endif
-#define PERF_REG_IP PERF_REG_X86_IP
-#define PERF_REG_SP PERF_REG_X86_SP
 
 #endif /* ARCH_PERF_REGS_H */
index cbd5821..b1ce0c5 100644 (file)
@@ -75,11 +75,12 @@ int arch_evlist__add_default_attrs(struct evlist *evlist,
 
 int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
 {
-       if (topdown_sys_has_perf_metrics() && evsel__sys_has_perf_metrics(lhs)) {
+       if (topdown_sys_has_perf_metrics() &&
+           (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) {
                /* Ensure the topdown slots comes first. */
-               if (strcasestr(lhs->name, "slots"))
+               if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots"))
                        return -1;
-               if (strcasestr(rhs->name, "slots"))
+               if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots"))
                        return 1;
                /* Followed by topdown events. */
                if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown"))
index 81d2265..090d0f3 100644 (file)
@@ -40,12 +40,11 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
 
 bool arch_evsel__must_be_in_group(const struct evsel *evsel)
 {
-       if (!evsel__sys_has_perf_metrics(evsel))
+       if (!evsel__sys_has_perf_metrics(evsel) || !evsel->name ||
+           strcasestr(evsel->name, "uops_retired.slots"))
                return false;
 
-       return evsel->name &&
-               (strcasestr(evsel->name, "slots") ||
-                strcasestr(evsel->name, "topdown"));
+       return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots");
 }
 
 int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
index 74b70fd..3180779 100644 (file)
@@ -60,8 +60,7 @@ struct intel_pt_recording {
        size_t                          priv_size;
 };
 
-static int intel_pt_parse_terms_with_default(const char *pmu_name,
-                                            struct list_head *formats,
+static int intel_pt_parse_terms_with_default(struct perf_pmu *pmu,
                                             const char *str,
                                             u64 *config)
 {
@@ -75,13 +74,12 @@ static int intel_pt_parse_terms_with_default(const char *pmu_name,
 
        INIT_LIST_HEAD(terms);
 
-       err = parse_events_terms(terms, str);
+       err = parse_events_terms(terms, str, /*input=*/ NULL);
        if (err)
                goto out_free;
 
        attr.config = *config;
-       err = perf_pmu__config_terms(pmu_name, formats, &attr, terms, true,
-                                    NULL);
+       err = perf_pmu__config_terms(pmu, &attr, terms, /*zero=*/true, /*err=*/NULL);
        if (err)
                goto out_free;
 
@@ -91,12 +89,10 @@ out_free:
        return err;
 }
 
-static int intel_pt_parse_terms(const char *pmu_name, struct list_head *formats,
-                               const char *str, u64 *config)
+static int intel_pt_parse_terms(struct perf_pmu *pmu, const char *str, u64 *config)
 {
        *config = 0;
-       return intel_pt_parse_terms_with_default(pmu_name, formats, str,
-                                                config);
+       return intel_pt_parse_terms_with_default(pmu, str, config);
 }
 
 static u64 intel_pt_masked_bits(u64 mask, u64 bits)
@@ -126,7 +122,7 @@ static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
 
        *res = 0;
 
-       mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
+       mask = perf_pmu__format_bits(intel_pt_pmu, str);
        if (!mask)
                return -EINVAL;
 
@@ -236,8 +232,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
 
        pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
 
-       intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, buf,
-                            &config);
+       intel_pt_parse_terms(intel_pt_pmu, buf, &config);
 
        close(dirfd);
        return config;
@@ -348,16 +343,11 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
        if (priv_size != ptr->priv_size)
                return -EINVAL;
 
-       intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
-                            "tsc", &tsc_bit);
-       intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
-                            "noretcomp", &noretcomp_bit);
-       intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
-                            "mtc", &mtc_bit);
-       mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
-                                             "mtc_period");
-       intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
-                            "cyc", &cyc_bit);
+       intel_pt_parse_terms(intel_pt_pmu, "tsc", &tsc_bit);
+       intel_pt_parse_terms(intel_pt_pmu, "noretcomp", &noretcomp_bit);
+       intel_pt_parse_terms(intel_pt_pmu, "mtc", &mtc_bit);
+       mtc_freq_bits = perf_pmu__format_bits(intel_pt_pmu, "mtc_period");
+       intel_pt_parse_terms(intel_pt_pmu, "cyc", &cyc_bit);
 
        intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
 
@@ -511,7 +501,7 @@ static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, int dirfd,
 
        valid |= 1;
 
-       bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
+       bits = perf_pmu__format_bits(intel_pt_pmu, name);
 
        config &= bits;
 
@@ -781,8 +771,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
                intel_pt_evsel->core.attr.aux_watermark = aux_watermark;
        }
 
-       intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
-                            "tsc", &tsc_bit);
+       intel_pt_parse_terms(intel_pt_pmu, "tsc", &tsc_bit);
 
        if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit))
                have_timing_info = true;
index a8a782b..191b372 100644 (file)
@@ -52,7 +52,7 @@ bool is_mem_loads_aux_event(struct evsel *leader)
        return leader->core.attr.config == MEM_LOADS_AUX;
 }
 
-char *perf_mem_events__name(int i, char *pmu_name)
+const char *perf_mem_events__name(int i, const char *pmu_name)
 {
        struct perf_mem_event *e = perf_mem_events__ptr(i);
 
@@ -65,7 +65,7 @@ char *perf_mem_events__name(int i, char *pmu_name)
 
                if (!pmu_name) {
                        mem_loads_name__init = true;
-                       pmu_name = (char *)"cpu";
+                       pmu_name = "cpu";
                }
 
                if (perf_pmus__have_event(pmu_name, "mem-loads-aux")) {
@@ -82,12 +82,12 @@ char *perf_mem_events__name(int i, char *pmu_name)
 
        if (i == PERF_MEM_EVENTS__STORE) {
                if (!pmu_name)
-                       pmu_name = (char *)"cpu";
+                       pmu_name = "cpu";
 
                scnprintf(mem_stores_name, sizeof(mem_stores_name),
                          e->name, pmu_name);
                return mem_stores_name;
        }
 
-       return (char *)e->name;
+       return e->name;
 }
index 8ad4112..b813502 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 
+#include "perf_regs.h"
 #include "../../../perf-sys.h"
 #include "../../../util/perf_regs.h"
 #include "../../../util/debug.h"
@@ -317,3 +318,8 @@ uint64_t arch__intr_reg_mask(void)
 
        return PERF_REGS_MASK;
 }
+
+uint64_t arch__user_reg_mask(void)
+{
+       return PERF_REGS_MASK;
+}
index 65d8cdf..f428cff 100644 (file)
@@ -126,7 +126,7 @@ close_dir:
        return ret;
 }
 
-static char *__pmu_find_real_name(const char *name)
+static const char *__pmu_find_real_name(const char *name)
 {
        struct pmu_alias *pmu_alias;
 
@@ -135,10 +135,10 @@ static char *__pmu_find_real_name(const char *name)
                        return pmu_alias->name;
        }
 
-       return (char *)name;
+       return name;
 }
 
-char *pmu_find_real_name(const char *name)
+const char *pmu_find_real_name(const char *name)
 {
        if (cached_list)
                return __pmu_find_real_name(name);
@@ -149,7 +149,7 @@ char *pmu_find_real_name(const char *name)
        return __pmu_find_real_name(name);
 }
 
-static char *__pmu_find_alias_name(const char *name)
+static const char *__pmu_find_alias_name(const char *name)
 {
        struct pmu_alias *pmu_alias;
 
@@ -160,7 +160,7 @@ static char *__pmu_find_alias_name(const char *name)
        return NULL;
 }
 
-char *pmu_find_alias_name(const char *name)
+const char *pmu_find_alias_name(const char *name)
 {
        if (cached_list)
                return __pmu_find_alias_name(name);
index ef71e8b..edb77e2 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <elfutils/libdwfl.h>
+#include "perf_regs.h"
 #include "../../../util/unwind-libdw.h"
 #include "../../../util/perf_regs.h"
 #include "util/sample.h"
index 07bbc44..c2ab309 100644 (file)
@@ -17,6 +17,7 @@ perf-y += inject-buildid.o
 perf-y += evlist-open-close.o
 perf-y += breakpoint.o
 perf-y += pmu-scan.o
+perf-y += uprobe.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
index a0625c7..faa18e6 100644 (file)
@@ -43,6 +43,9 @@ int bench_inject_build_id(int argc, const char **argv);
 int bench_evlist_open_close(int argc, const char **argv);
 int bench_breakpoint_thread(int argc, const char **argv);
 int bench_breakpoint_enable(int argc, const char **argv);
+int bench_uprobe_baseline(int argc, const char **argv);
+int bench_uprobe_empty(int argc, const char **argv);
+int bench_uprobe_trace_printk(int argc, const char **argv);
 int bench_pmu_scan(int argc, const char **argv);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
index 41385f8..dfd18f5 100644 (file)
@@ -47,6 +47,7 @@ struct breakpoint {
 static int breakpoint_setup(void *addr)
 {
        struct perf_event_attr attr = { .size = 0, };
+       int fd;
 
        attr.type = PERF_TYPE_BREAKPOINT;
        attr.size = sizeof(attr);
@@ -56,7 +57,12 @@ static int breakpoint_setup(void *addr)
        attr.bp_addr = (unsigned long)addr;
        attr.bp_type = HW_BREAKPOINT_RW;
        attr.bp_len = HW_BREAKPOINT_LEN_1;
-       return syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
+       fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
+
+       if (fd < 0)
+               fd = -errno;
+
+       return fd;
 }
 
 static void *passive_thread(void *arg)
@@ -122,8 +128,14 @@ int bench_breakpoint_thread(int argc, const char **argv)
 
        for (i = 0; i < thread_params.nbreakpoints; i++) {
                breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched);
-               if (breakpoints[i].fd == -1)
+
+               if (breakpoints[i].fd < 0) {
+                       if (breakpoints[i].fd == -ENODEV) {
+                               printf("Skipping perf bench breakpoint thread: No hardware support\n");
+                               return 0;
+                       }
                        exit((perror("perf_event_open"), EXIT_FAILURE));
+               }
        }
        gettimeofday(&start, NULL);
        for (i = 0; i < thread_params.nparallel; i++) {
@@ -196,8 +208,14 @@ int bench_breakpoint_enable(int argc, const char **argv)
                exit(EXIT_FAILURE);
        }
        fd = breakpoint_setup(&watched);
-       if (fd == -1)
+
+       if (fd < 0) {
+               if (fd == -ENODEV) {
+                       printf("Skipping perf bench breakpoint enable: No hardware support\n");
+                       return 0;
+               }
                exit((perror("perf_event_open"), EXIT_FAILURE));
+       }
        nthreads = enable_params.npassive + enable_params.nactive;
        threads = calloc(nthreads, sizeof(threads[0]));
        if (!threads)
index c7d207f..9e4d364 100644 (file)
@@ -57,9 +57,7 @@ static int save_result(void)
                r->is_core = pmu->is_core;
                r->nr_caps = pmu->nr_caps;
 
-               r->nr_aliases = 0;
-               list_for_each(list, &pmu->aliases)
-                       r->nr_aliases++;
+               r->nr_aliases = perf_pmu__num_events(pmu);
 
                r->nr_formats = 0;
                list_for_each(list, &pmu->format)
@@ -98,9 +96,7 @@ static int check_result(bool core_only)
                        return -1;
                }
 
-               nr = 0;
-               list_for_each(list, &pmu->aliases)
-                       nr++;
+               nr = perf_pmu__num_events(pmu);
                if (nr != r->nr_aliases) {
                        pr_err("Unmatched number of event aliases in %s: expect %d vs got %d\n",
                                pmu->name, r->nr_aliases, nr);
diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c
new file mode 100644 (file)
index 0000000..914c081
--- /dev/null
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * uprobe.c
+ *
+ * uprobe benchmarks
+ *
+ *  Copyright (C) 2023, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+#include <linux/compiler.h>
+#include <linux/time64.h>
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#define LOOPS_DEFAULT 1000
+static int loops = LOOPS_DEFAULT;
+
+enum bench_uprobe {
+        BENCH_UPROBE__BASELINE,
+        BENCH_UPROBE__EMPTY,
+        BENCH_UPROBE__TRACE_PRINTK,
+};
+
+static const struct option options[] = {
+       OPT_INTEGER('l', "loop",        &loops,         "Specify number of loops"),
+       OPT_END()
+};
+
+static const char * const bench_uprobe_usage[] = {
+       "perf bench uprobe <options>",
+       NULL
+};
+
+#ifdef HAVE_BPF_SKEL
+#include "bpf_skel/bench_uprobe.skel.h"
+
+#define bench_uprobe__attach_uprobe(prog) \
+       skel->links.prog = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.prog, \
+                                                          /*pid=*/-1, \
+                                                          /*binary_path=*/"/lib64/libc.so.6", \
+                                                          /*func_offset=*/0, \
+                                                          /*opts=*/&uprobe_opts); \
+       if (!skel->links.prog) { \
+               err = -errno; \
+               fprintf(stderr, "Failed to attach bench uprobe \"%s\": %s\n", #prog, strerror(errno)); \
+               goto cleanup; \
+       }
+
+struct bench_uprobe_bpf *skel;
+
+static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench)
+{
+       DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+       int err;
+
+       /* Load and verify BPF application */
+       skel = bench_uprobe_bpf__open();
+       if (!skel) {
+               fprintf(stderr, "Failed to open and load uprobes bench BPF skeleton\n");
+               return -1;
+       }
+
+       err = bench_uprobe_bpf__load(skel);
+       if (err) {
+               fprintf(stderr, "Failed to load and verify BPF skeleton\n");
+               goto cleanup;
+       }
+
+       uprobe_opts.func_name = "usleep";
+       switch (bench) {
+       case BENCH_UPROBE__BASELINE:                                                    break;
+       case BENCH_UPROBE__EMPTY:        bench_uprobe__attach_uprobe(empty);            break;
+       case BENCH_UPROBE__TRACE_PRINTK: bench_uprobe__attach_uprobe(trace_printk);     break;
+       default:
+               fprintf(stderr, "Invalid bench: %d\n", bench);
+               goto cleanup;
+       }
+
+       return err;
+cleanup:
+       bench_uprobe_bpf__destroy(skel);
+       return err;
+}
+
+static void bench_uprobe__teardown_bpf_skel(void)
+{
+       if (skel) {
+               bench_uprobe_bpf__destroy(skel);
+               skel = NULL;
+       }
+}
+#else
+static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench __maybe_unused) { return 0; }
+static void bench_uprobe__teardown_bpf_skel(void) {};
+#endif
+
+static int bench_uprobe_format__default_fprintf(const char *name, const char *unit, u64 diff, FILE *fp)
+{
+       static u64 baseline, previous;
+       s64 diff_to_baseline = diff - baseline,
+           diff_to_previous = diff - previous;
+       int printed = fprintf(fp, "# Executed %'d %s calls\n", loops, name);
+
+       printed += fprintf(fp, " %14s: %'" PRIu64 " %ss", "Total time", diff, unit);
+
+       if (baseline) {
+               printed += fprintf(fp, " %s%'" PRId64 " to baseline", diff_to_baseline > 0 ? "+" : "", diff_to_baseline);
+
+               if (previous != baseline)
+                       fprintf(stdout, " %s%'" PRId64 " to previous", diff_to_previous > 0 ? "+" : "", diff_to_previous);
+       }
+
+       printed += fprintf(fp, "\n\n %'.3f %ss/op", (double)diff / (double)loops, unit);
+
+       if (baseline) {
+               printed += fprintf(fp, " %'.3f %ss/op to baseline", (double)diff_to_baseline / (double)loops, unit);
+
+               if (previous != baseline)
+                       printed += fprintf(fp, " %'.3f %ss/op to previous", (double)diff_to_previous / (double)loops, unit);
+       } else {
+               baseline = diff;
+       }
+
+       fputc('\n', fp);
+
+       previous = diff;
+
+       return printed + 1;
+}
+
+static int bench_uprobe(int argc, const char **argv, enum bench_uprobe bench)
+{
+       const char *name = "usleep(1000)", *unit = "usec";
+       struct timespec start, end;
+       u64 diff;
+       int i;
+
+       argc = parse_options(argc, argv, options, bench_uprobe_usage, 0);
+
+       if (bench != BENCH_UPROBE__BASELINE && bench_uprobe__setup_bpf_skel(bench) < 0)
+               return 0;
+
+        clock_gettime(CLOCK_REALTIME, &start);
+
+       for (i = 0; i < loops; i++) {
+               usleep(USEC_PER_MSEC);
+       }
+
+       clock_gettime(CLOCK_REALTIME, &end);
+
+       diff = end.tv_sec * NSEC_PER_SEC + end.tv_nsec - (start.tv_sec * NSEC_PER_SEC + start.tv_nsec);
+       diff /= NSEC_PER_USEC;
+
+       switch (bench_format) {
+       case BENCH_FORMAT_DEFAULT:
+               bench_uprobe_format__default_fprintf(name, unit, diff, stdout);
+               break;
+
+       case BENCH_FORMAT_SIMPLE:
+               printf("%" PRIu64 "\n", diff);
+               break;
+
+       default:
+               /* reaching here is something of a disaster */
+               fprintf(stderr, "Unknown format:%d\n", bench_format);
+               exit(1);
+       }
+
+       if (bench != BENCH_UPROBE__BASELINE)
+               bench_uprobe__teardown_bpf_skel();
+
+       return 0;
+}
+
+int bench_uprobe_baseline(int argc, const char **argv)
+{
+       return bench_uprobe(argc, argv, BENCH_UPROBE__BASELINE);
+}
+
+int bench_uprobe_empty(int argc, const char **argv)
+{
+       return bench_uprobe(argc, argv, BENCH_UPROBE__EMPTY);
+}
+
+int bench_uprobe_trace_printk(int argc, const char **argv)
+{
+       return bench_uprobe(argc, argv, BENCH_UPROBE__TRACE_PRINTK);
+}
index 5033e8b..1a8898d 100644 (file)
@@ -105,6 +105,13 @@ static struct bench breakpoint_benchmarks[] = {
        { NULL, NULL, NULL },
 };
 
+static struct bench uprobe_benchmarks[] = {
+       { "baseline",   "Baseline libc usleep(1000) call",                              bench_uprobe_baseline,  },
+       { "empty",      "Attach empty BPF prog to uprobe on usleep, system wide",       bench_uprobe_empty,     },
+       { "trace_printk", "Attach trace_printk BPF prog to uprobe on usleep syswide",   bench_uprobe_trace_printk,      },
+       { NULL, NULL, NULL },
+};
+
 struct collection {
        const char      *name;
        const char      *summary;
@@ -124,6 +131,7 @@ static struct collection collections[] = {
 #endif
        { "internals",  "Perf-internals benchmarks",                    internals_benchmarks    },
        { "breakpoint", "Breakpoint benchmarks",                        breakpoint_benchmarks   },
+       { "uprobe",     "uprobe benchmarks",                            uprobe_benchmarks       },
        { "all",        "All benchmarks",                               NULL                    },
        { NULL,         NULL,                                           NULL                    }
 };
index e8a1b16..57d300d 100644 (file)
@@ -1915,8 +1915,8 @@ static int data_init(int argc, const char **argv)
                struct perf_data *data = &d->data;
 
                data->path  = use_default ? defaults[i] : argv[i];
-               data->mode  = PERF_DATA_MODE_READ,
-               data->force = force,
+               data->mode  = PERF_DATA_MODE_READ;
+               data->force = force;
 
                d->idx  = i;
        }
index 7fec2cc..a343823 100644 (file)
@@ -145,9 +145,20 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
                putchar('\n');
 
        if (desc && print_state->desc) {
+               char *desc_with_unit = NULL;
+               int desc_len = -1;
+
+               if (pmu_name && strcmp(pmu_name, "default_core")) {
+                       desc_len = strlen(desc);
+                       desc_len = asprintf(&desc_with_unit,
+                                           desc[desc_len - 1] != '.'
+                                             ? "%s. Unit: %s" : "%s Unit: %s",
+                                           desc, pmu_name);
+               }
                printf("%*s", 8, "[");
-               wordwrap(desc, 8, pager_get_columns(), 0);
+               wordwrap(desc_len > 0 ? desc_with_unit : desc, 8, pager_get_columns(), 0);
                printf("]\n");
+               free(desc_with_unit);
        }
        long_desc = long_desc ?: desc;
        if (long_desc && print_state->long_desc) {
@@ -423,6 +434,13 @@ static void json_print_metric(void *ps __maybe_unused, const char *group,
        strbuf_release(&buf);
 }
 
+static bool default_skip_duplicate_pmus(void *ps)
+{
+       struct print_state *print_state = ps;
+
+       return !print_state->long_desc;
+}
+
 int cmd_list(int argc, const char **argv)
 {
        int i, ret = 0;
@@ -434,6 +452,7 @@ int cmd_list(int argc, const char **argv)
                .print_end = default_print_end,
                .print_event = default_print_event,
                .print_metric = default_print_metric,
+               .skip_duplicate_pmus = default_skip_duplicate_pmus,
        };
        const char *cputype = NULL;
        const char *unit_name = NULL;
@@ -502,7 +521,7 @@ int cmd_list(int argc, const char **argv)
                                ret = -1;
                                goto out;
                        }
-                       default_ps.pmu_glob = pmu->name;
+                       default_ps.pmu_glob = strdup(pmu->name);
                }
        }
        print_cb.print_start(ps);
index c15386c..b141f21 100644 (file)
@@ -2052,6 +2052,7 @@ static int __cmd_contention(int argc, const char **argv)
        if (IS_ERR(session)) {
                pr_err("Initializing perf session failed\n");
                err = PTR_ERR(session);
+               session = NULL;
                goto out_delete;
        }
 
@@ -2506,7 +2507,7 @@ int cmd_lock(int argc, const char **argv)
        OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
                     "Max number of BPF map entries", parse_map_entry),
        OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
-                    "Set the maximum stack depth when collecting lopck contention, "
+                    "Set the maximum stack depth when collecting lock contention, "
                     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
        OPT_INTEGER(0, "stack-skip", &stack_skip,
                    "Set the number of stack depth to skip when finding a lock caller, "
index aec18db..34bb31f 100644 (file)
@@ -37,8 +37,6 @@
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
 #include "util/perf_api_probe.h"
-#include "util/llvm-utils.h"
-#include "util/bpf-loader.h"
 #include "util/trigger.h"
 #include "util/perf-hooks.h"
 #include "util/cpu-set-sched.h"
@@ -2465,16 +2463,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
                }
        }
 
-       err = bpf__apply_obj_config();
-       if (err) {
-               char errbuf[BUFSIZ];
-
-               bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
-               pr_err("ERROR: Apply config to BPF failed: %s\n",
-                        errbuf);
-               goto out_free_threads;
-       }
-
        /*
         * Normally perf_session__new would do this, but it doesn't have the
         * evlist.
@@ -3486,10 +3474,6 @@ static struct option __record_options[] = {
                    "collect kernel callchains"),
        OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
                    "collect user callchains"),
-       OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
-                  "clang binary to use for compiling BPF scriptlets"),
-       OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
-                  "options passed to clang when compiling BPF scriptlets"),
        OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
                   "file", "vmlinux pathname"),
        OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
@@ -3967,27 +3951,6 @@ int cmd_record(int argc, const char **argv)
 
        setlocale(LC_ALL, "");
 
-#ifndef HAVE_LIBBPF_SUPPORT
-# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
-       set_nobuild('\0', "clang-path", true);
-       set_nobuild('\0', "clang-opt", true);
-# undef set_nobuild
-#endif
-
-#ifndef HAVE_BPF_PROLOGUE
-# if !defined (HAVE_DWARF_SUPPORT)
-#  define REASON  "NO_DWARF=1"
-# elif !defined (HAVE_LIBBPF_SUPPORT)
-#  define REASON  "NO_LIBBPF=1"
-# else
-#  define REASON  "this architecture doesn't support BPF prologue"
-# endif
-# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
-       set_nobuild('\0', "vmlinux", true);
-# undef set_nobuild
-# undef REASON
-#endif
-
 #ifndef HAVE_BPF_SKEL
 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
        set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
@@ -4116,14 +4079,6 @@ int cmd_record(int argc, const char **argv)
        if (dry_run)
                goto out;
 
-       err = bpf__setup_stdout(rec->evlist);
-       if (err) {
-               bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
-               pr_err("ERROR: Setup BPF stdout failed: %s\n",
-                        errbuf);
-               goto out;
-       }
-
        err = -ENOMEM;
 
        if (rec->no_buildid_cache || rec->no_buildid) {
index 200b3e7..517bf25 100644 (file)
@@ -2199,6 +2199,17 @@ static void process_event(struct perf_script *script,
        if (PRINT_FIELD(RETIRE_LAT))
                fprintf(fp, "%16" PRIu16, sample->retire_lat);
 
+       if (PRINT_FIELD(CGROUP)) {
+               const char *cgrp_name;
+               struct cgroup *cgrp = cgroup__find(machine->env,
+                                                  sample->cgroup);
+               if (cgrp != NULL)
+                       cgrp_name = cgrp->name;
+               else
+                       cgrp_name = "unknown";
+               fprintf(fp, " %s", cgrp_name);
+       }
+
        if (PRINT_FIELD(IP)) {
                struct callchain_cursor *cursor = NULL;
 
@@ -2243,17 +2254,6 @@ static void process_event(struct perf_script *script,
        if (PRINT_FIELD(CODE_PAGE_SIZE))
                fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str));
 
-       if (PRINT_FIELD(CGROUP)) {
-               const char *cgrp_name;
-               struct cgroup *cgrp = cgroup__find(machine->env,
-                                                  sample->cgroup);
-               if (cgrp != NULL)
-                       cgrp_name = cgrp->name;
-               else
-                       cgrp_name = "unknown";
-               fprintf(fp, " %s", cgrp_name);
-       }
-
        perf_sample__fprintf_ipc(sample, attr, fp);
 
        fprintf(fp, "\n");
index 1baa2ac..ea8c7ec 100644 (file)
@@ -1805,6 +1805,7 @@ int cmd_top(int argc, const char **argv)
        top.session = perf_session__new(NULL, NULL);
        if (IS_ERR(top.session)) {
                status = PTR_ERR(top.session);
+               top.session = NULL;
                goto out_delete_evlist;
        }
 
index 6e73d0e..e541d0e 100644 (file)
 #include <api/fs/tracing_path.h>
 #ifdef HAVE_LIBBPF_SUPPORT
 #include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#ifdef HAVE_BPF_SKEL
+#include "bpf_skel/augmented_raw_syscalls.skel.h"
+#endif
 #endif
 #include "util/bpf_map.h"
 #include "util/rlimit.h"
@@ -53,7 +57,6 @@
 #include "trace/beauty/beauty.h"
 #include "trace-event.h"
 #include "util/parse-events.h"
-#include "util/bpf-loader.h"
 #include "util/tracepoint.h"
 #include "callchain.h"
 #include "print_binary.h"
@@ -127,25 +130,19 @@ struct trace {
        struct syscalltbl       *sctbl;
        struct {
                struct syscall  *table;
-               struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
-                       struct bpf_map  *sys_enter,
-                                       *sys_exit;
-               }               prog_array;
                struct {
                        struct evsel *sys_enter,
-                                         *sys_exit,
-                                         *augmented;
+                               *sys_exit,
+                               *bpf_output;
                }               events;
-               struct bpf_program *unaugmented_prog;
        } syscalls;
-       struct {
-               struct bpf_map *map;
-       } dump;
+#ifdef HAVE_BPF_SKEL
+       struct augmented_raw_syscalls_bpf *skel;
+#endif
        struct record_opts      opts;
        struct evlist   *evlist;
        struct machine          *host;
        struct thread           *current;
-       struct bpf_object       *bpf_obj;
        struct cgroup           *cgroup;
        u64                     base_time;
        FILE                    *output;
@@ -415,6 +412,7 @@ static int evsel__init_syscall_tp(struct evsel *evsel)
                if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
                    evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
                        return -ENOENT;
+
                return 0;
        }
 
@@ -1296,6 +1294,22 @@ static struct thread_trace *thread_trace__new(void)
        return ttrace;
 }
 
+static void thread_trace__free_files(struct thread_trace *ttrace);
+
+static void thread_trace__delete(void *pttrace)
+{
+       struct thread_trace *ttrace = pttrace;
+
+       if (!ttrace)
+               return;
+
+       intlist__delete(ttrace->syscall_stats);
+       ttrace->syscall_stats = NULL;
+       thread_trace__free_files(ttrace);
+       zfree(&ttrace->entry_str);
+       free(ttrace);
+}
+
 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
 {
        struct thread_trace *ttrace;
@@ -1333,6 +1347,17 @@ void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
 
 static const size_t trace__entry_str_size = 2048;
 
+static void thread_trace__free_files(struct thread_trace *ttrace)
+{
+       for (int i = 0; i < ttrace->files.max; ++i) {
+               struct file *file = ttrace->files.table + i;
+               zfree(&file->pathname);
+       }
+
+       zfree(&ttrace->files.table);
+       ttrace->files.max  = -1;
+}
+
 static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
 {
        if (fd < 0)
@@ -1635,6 +1660,8 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
        if (trace->host == NULL)
                return -ENOMEM;
 
+       thread__set_priv_destructor(thread_trace__delete);
+
        err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
        if (err < 0)
                goto out;
@@ -2816,7 +2843,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
        if (thread)
                trace__fprintf_comm_tid(trace, thread, trace->output);
 
-       if (evsel == trace->syscalls.events.augmented) {
+       if (evsel == trace->syscalls.events.bpf_output) {
                int id = perf_evsel__sc_tp_uint(evsel, id, sample);
                struct syscall *sc = trace__syscall_info(trace, evsel, id);
 
@@ -3136,13 +3163,8 @@ static void evlist__free_syscall_tp_fields(struct evlist *evlist)
        struct evsel *evsel;
 
        evlist__for_each_entry(evlist, evsel) {
-               struct evsel_trace *et = evsel->priv;
-
-               if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls"))
-                       continue;
-
-               zfree(&et->fmt);
-               free(et);
+               evsel_trace__delete(evsel->priv);
+               evsel->priv = NULL;
        }
 }
 
@@ -3254,35 +3276,16 @@ out_enomem:
        goto out;
 }
 
-#ifdef HAVE_LIBBPF_SUPPORT
-static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
-{
-       if (trace->bpf_obj == NULL)
-               return NULL;
-
-       return bpf_object__find_map_by_name(trace->bpf_obj, name);
-}
-
-static void trace__set_bpf_map_filtered_pids(struct trace *trace)
-{
-       trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
-}
-
-static void trace__set_bpf_map_syscalls(struct trace *trace)
-{
-       trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
-       trace->syscalls.prog_array.sys_exit  = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
-}
-
+#ifdef HAVE_BPF_SKEL
 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
 {
        struct bpf_program *pos, *prog = NULL;
        const char *sec_name;
 
-       if (trace->bpf_obj == NULL)
+       if (trace->skel->obj == NULL)
                return NULL;
 
-       bpf_object__for_each_program(pos, trace->bpf_obj) {
+       bpf_object__for_each_program(pos, trace->skel->obj) {
                sec_name = bpf_program__section_name(pos);
                if (sec_name && !strcmp(sec_name, name)) {
                        prog = pos;
@@ -3300,12 +3303,12 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str
 
        if (prog_name == NULL) {
                char default_prog_name[256];
-               scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->name);
+               scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->name);
                prog = trace__find_bpf_program_by_title(trace, default_prog_name);
                if (prog != NULL)
                        goto out_found;
                if (sc->fmt && sc->fmt->alias) {
-                       scnprintf(default_prog_name, sizeof(default_prog_name), "!syscalls:sys_%s_%s", type, sc->fmt->alias);
+                       scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s", type, sc->fmt->alias);
                        prog = trace__find_bpf_program_by_title(trace, default_prog_name);
                        if (prog != NULL)
                                goto out_found;
@@ -3323,7 +3326,7 @@ out_found:
        pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
                 prog_name, type, sc->name);
 out_unaugmented:
-       return trace->syscalls.unaugmented_prog;
+       return trace->skel->progs.syscall_unaugmented;
 }
 
 static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
@@ -3340,13 +3343,13 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
 static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
 {
        struct syscall *sc = trace__syscall_info(trace, NULL, id);
-       return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
+       return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
 }
 
 static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
 {
        struct syscall *sc = trace__syscall_info(trace, NULL, id);
-       return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
+       return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
 }
 
 static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
@@ -3371,7 +3374,7 @@ try_to_find_pair:
                bool is_candidate = false;
 
                if (pair == NULL || pair == sc ||
-                   pair->bpf_prog.sys_enter == trace->syscalls.unaugmented_prog)
+                   pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
                        continue;
 
                for (field = sc->args, candidate_field = pair->args;
@@ -3395,6 +3398,19 @@ try_to_find_pair:
                        if (strcmp(field->type, candidate_field->type))
                                goto next_candidate;
 
+                       /*
+                        * This is limited in the BPF program but sys_write
+                        * uses "const char *" for its "buf" arg so we need to
+                        * use some heuristic that is kinda future proof...
+                        */
+                       if (strcmp(field->type, "const char *") == 0 &&
+                           !(strstr(field->name, "name") ||
+                             strstr(field->name, "path") ||
+                             strstr(field->name, "file") ||
+                             strstr(field->name, "root") ||
+                             strstr(field->name, "description")))
+                               goto next_candidate;
+
                        is_candidate = true;
                }
 
@@ -3424,7 +3440,7 @@ try_to_find_pair:
                 */
                if (pair_prog == NULL) {
                        pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter");
-                       if (pair_prog == trace->syscalls.unaugmented_prog)
+                       if (pair_prog == trace->skel->progs.syscall_unaugmented)
                                goto next_candidate;
                }
 
@@ -3439,8 +3455,8 @@ try_to_find_pair:
 
 static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
 {
-       int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
-           map_exit_fd  = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
+       int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter);
+       int map_exit_fd  = bpf_map__fd(trace->skel->maps.syscalls_sys_exit);
        int err = 0, key;
 
        for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
@@ -3502,7 +3518,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
                 * For now we're just reusing the sys_enter prog, and if it
                 * already has an augmenter, we don't need to find one.
                 */
-               if (sc->bpf_prog.sys_enter != trace->syscalls.unaugmented_prog)
+               if (sc->bpf_prog.sys_enter != trace->skel->progs.syscall_unaugmented)
                        continue;
 
                /*
@@ -3525,74 +3541,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
                        break;
        }
 
-
        return err;
 }
-
-static void trace__delete_augmented_syscalls(struct trace *trace)
-{
-       struct evsel *evsel, *tmp;
-
-       evlist__remove(trace->evlist, trace->syscalls.events.augmented);
-       evsel__delete(trace->syscalls.events.augmented);
-       trace->syscalls.events.augmented = NULL;
-
-       evlist__for_each_entry_safe(trace->evlist, tmp, evsel) {
-               if (evsel->bpf_obj == trace->bpf_obj) {
-                       evlist__remove(trace->evlist, evsel);
-                       evsel__delete(evsel);
-               }
-
-       }
-
-       bpf_object__close(trace->bpf_obj);
-       trace->bpf_obj = NULL;
-}
-#else // HAVE_LIBBPF_SUPPORT
-static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
-                                                  const char *name __maybe_unused)
-{
-       return NULL;
-}
-
-static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
-{
-}
-
-static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
-{
-}
-
-static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
-                                                           const char *name __maybe_unused)
-{
-       return NULL;
-}
-
-static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
-{
-       return 0;
-}
-
-static void trace__delete_augmented_syscalls(struct trace *trace __maybe_unused)
-{
-}
-#endif // HAVE_LIBBPF_SUPPORT
-
-static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
-{
-       struct evsel *evsel;
-
-       evlist__for_each_entry(trace->evlist, evsel) {
-               if (evsel == trace->syscalls.events.augmented ||
-                   evsel->bpf_obj == trace->bpf_obj)
-                       continue;
-
-               return false;
-       }
-
-       return true;
-}
+#endif // HAVE_BPF_SKEL
 
 static int trace__set_ev_qualifier_filter(struct trace *trace)
 {
@@ -3956,23 +3907,31 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        err = evlist__open(evlist);
        if (err < 0)
                goto out_error_open;
+#ifdef HAVE_BPF_SKEL
+       if (trace->syscalls.events.bpf_output) {
+               struct perf_cpu cpu;
 
-       err = bpf__apply_obj_config();
-       if (err) {
-               char errbuf[BUFSIZ];
-
-               bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
-               pr_err("ERROR: Apply config to BPF failed: %s\n",
-                        errbuf);
-               goto out_error_open;
+               /*
+                * Set up the __augmented_syscalls__ BPF map to hold for each
+                * CPU the bpf-output event's file descriptor.
+                */
+               perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) {
+                       bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__,
+                                       &cpu.cpu, sizeof(int),
+                                       xyarray__entry(trace->syscalls.events.bpf_output->core.fd,
+                                                      cpu.cpu, 0),
+                                       sizeof(__u32), BPF_ANY);
+               }
        }
-
+#endif
        err = trace__set_filter_pids(trace);
        if (err < 0)
                goto out_error_mem;
 
-       if (trace->syscalls.prog_array.sys_enter)
+#ifdef HAVE_BPF_SKEL
+       if (trace->skel && trace->skel->progs.sys_enter)
                trace__init_syscalls_bpf_prog_array_maps(trace);
+#endif
 
        if (trace->ev_qualifier_ids.nr > 0) {
                err = trace__set_ev_qualifier_filter(trace);
@@ -4005,9 +3964,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
        if (err < 0)
                goto out_error_apply_filters;
 
-       if (trace->dump.map)
-               bpf_map__fprintf(trace->dump.map, trace->output);
-
        err = evlist__mmap(evlist, trace->opts.mmap_pages);
        if (err < 0)
                goto out_error_mmap;
@@ -4704,6 +4660,18 @@ static void trace__exit(struct trace *trace)
        zfree(&trace->perfconfig_events);
 }
 
+#ifdef HAVE_BPF_SKEL
+static int bpf__setup_bpf_output(struct evlist *evlist)
+{
+       int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/");
+
+       if (err)
+               pr_debug("ERROR: failed to create the \"__augmented_syscalls__\" bpf-output event\n");
+
+       return err;
+}
+#endif
+
 int cmd_trace(int argc, const char **argv)
 {
        const char *trace_usage[] = {
@@ -4735,7 +4703,6 @@ int cmd_trace(int argc, const char **argv)
                .max_stack = UINT_MAX,
                .max_events = ULONG_MAX,
        };
-       const char *map_dump_str = NULL;
        const char *output_name = NULL;
        const struct option trace_options[] = {
        OPT_CALLBACK('e', "event", &trace, "event",
@@ -4769,9 +4736,6 @@ int cmd_trace(int argc, const char **argv)
        OPT_CALLBACK(0, "duration", &trace, "float",
                     "show only events with duration > N.M ms",
                     trace__set_duration),
-#ifdef HAVE_LIBBPF_SUPPORT
-       OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
-#endif
        OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
        OPT_INCR('v', "verbose", &verbose, "be more verbose"),
        OPT_BOOLEAN('T', "time", &trace.full_time,
@@ -4898,87 +4862,48 @@ int cmd_trace(int argc, const char **argv)
                                       "cgroup monitoring only available in system-wide mode");
        }
 
-       evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
-       if (IS_ERR(evsel)) {
-               bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
-               pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
-               goto out;
-       }
-
-       if (evsel) {
-               trace.syscalls.events.augmented = evsel;
+#ifdef HAVE_BPF_SKEL
+       if (!trace.trace_syscalls)
+               goto skip_augmentation;
 
-               evsel = evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
-               if (evsel == NULL) {
-                       pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
-                       goto out;
-               }
+       trace.skel = augmented_raw_syscalls_bpf__open();
+       if (!trace.skel) {
+               pr_debug("Failed to open augmented syscalls BPF skeleton");
+       } else {
+               /*
+                * Disable attaching the BPF programs except for sys_enter and
+                * sys_exit that tail call into this as necessary.
+                */
+               struct bpf_program *prog;
 
-               if (evsel->bpf_obj == NULL) {
-                       pr_err("ERROR: raw_syscalls:sys_enter not associated to a BPF object\n");
-                       goto out;
+               bpf_object__for_each_program(prog, trace.skel->obj) {
+                       if (prog != trace.skel->progs.sys_enter && prog != trace.skel->progs.sys_exit)
+                               bpf_program__set_autoattach(prog, /*autoattach=*/false);
                }
 
-               trace.bpf_obj = evsel->bpf_obj;
+               err = augmented_raw_syscalls_bpf__load(trace.skel);
 
-               /*
-                * If we have _just_ the augmenter event but don't have a
-                * explicit --syscalls, then assume we want all strace-like
-                * syscalls:
-                */
-               if (!trace.trace_syscalls && trace__only_augmented_syscalls_evsels(&trace))
-                       trace.trace_syscalls = true;
-               /*
-                * So, if we have a syscall augmenter, but trace_syscalls, aka
-                * strace-like syscall tracing is not set, then we need to trow
-                * away the augmenter, i.e. all the events that were created
-                * from that BPF object file.
-                *
-                * This is more to fix the current .perfconfig trace.add_events
-                * style of setting up the strace-like eBPF based syscall point
-                * payload augmenter.
-                *
-                * All this complexity will be avoided by adding an alternative
-                * to trace.add_events in the form of
-                * trace.bpf_augmented_syscalls, that will be only parsed if we
-                * need it.
-                *
-                * .perfconfig trace.add_events is still useful if we want, for
-                * instance, have msr_write.msr in some .perfconfig profile based
-                * 'perf trace --config determinism.profile' mode, where for some
-                * particular goal/workload type we want a set of events and
-                * output mode (with timings, etc) instead of having to add
-                * all via the command line.
-                *
-                * Also --config to specify an alternate .perfconfig file needs
-                * to be implemented.
-                */
-               if (!trace.trace_syscalls) {
-                       trace__delete_augmented_syscalls(&trace);
+               if (err < 0) {
+                       libbpf_strerror(err, bf, sizeof(bf));
+                       pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n", bf);
                } else {
-                       trace__set_bpf_map_filtered_pids(&trace);
-                       trace__set_bpf_map_syscalls(&trace);
-                       trace.syscalls.unaugmented_prog = trace__find_bpf_program_by_title(&trace, "!raw_syscalls:unaugmented");
+                       augmented_raw_syscalls_bpf__attach(trace.skel);
+                       trace__add_syscall_newtp(&trace);
                }
        }
 
-       err = bpf__setup_stdout(trace.evlist);
+       err = bpf__setup_bpf_output(trace.evlist);
        if (err) {
-               bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
-               pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
+               libbpf_strerror(err, bf, sizeof(bf));
+               pr_err("ERROR: Setup BPF output event failed: %s\n", bf);
                goto out;
        }
-
+       trace.syscalls.events.bpf_output = evlist__last(trace.evlist);
+       assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__"));
+skip_augmentation:
+#endif
        err = -1;
 
-       if (map_dump_str) {
-               trace.dump.map = trace__find_bpf_map_by_name(&trace, map_dump_str);
-               if (trace.dump.map == NULL) {
-                       pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
-                       goto out;
-               }
-       }
-
        if (trace.trace_pgfaults) {
                trace.opts.sample_address = true;
                trace.opts.sample_time = true;
@@ -5029,7 +4954,7 @@ int cmd_trace(int argc, const char **argv)
         * buffers that are being copied from kernel to userspace, think 'read'
         * syscall.
         */
-       if (trace.syscalls.events.augmented) {
+       if (trace.syscalls.events.bpf_output) {
                evlist__for_each_entry(trace.evlist, evsel) {
                        bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
 
@@ -5038,9 +4963,9 @@ int cmd_trace(int argc, const char **argv)
                                goto init_augmented_syscall_tp;
                        }
 
-                       if (trace.syscalls.events.augmented->priv == NULL &&
+                       if (trace.syscalls.events.bpf_output->priv == NULL &&
                            strstr(evsel__name(evsel), "syscalls:sys_enter")) {
-                               struct evsel *augmented = trace.syscalls.events.augmented;
+                               struct evsel *augmented = trace.syscalls.events.bpf_output;
                                if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
                                    evsel__init_augmented_syscall_tp_args(augmented))
                                        goto out;
@@ -5145,5 +5070,8 @@ out_close:
                fclose(trace.output);
 out:
        trace__exit(&trace);
+#ifdef HAVE_BPF_SKEL
+       augmented_raw_syscalls_bpf__destroy(trace.skel);
+#endif
        return err;
 }
index a0f1d8a..4314c91 100755 (executable)
@@ -123,7 +123,7 @@ check () {
 
   shift
 
-  check_2 "tools/$file" "$file" $*
+  check_2 "tools/$file" "$file" "$@"
 }
 
 beauty_check () {
@@ -131,7 +131,7 @@ beauty_check () {
 
   shift
 
-  check_2 "tools/perf/trace/beauty/$file" "$file" $*
+  check_2 "tools/perf/trace/beauty/$file" "$file" "$@"
 }
 
 # Check if we have the kernel headers (tools/perf/../../include), else
@@ -183,7 +183,7 @@ done
 check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
 check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
 
-cd tools/perf
+cd tools/perf || exit
 
 if [ ${#FAILURES[@]} -gt 0 ]
 then
index b1f51ef..72f263d 100644 (file)
@@ -254,6 +254,30 @@ static int check_addr_al(void *ctx)
        return 0;
 }
 
+static int check_address_al(void *ctx, const struct perf_dlfilter_sample *sample)
+{
+       struct perf_dlfilter_al address_al;
+       const struct perf_dlfilter_al *al;
+
+       al = perf_dlfilter_fns.resolve_ip(ctx);
+       if (!al)
+               return test_fail("resolve_ip() failed");
+
+       address_al.size = sizeof(address_al);
+       if (perf_dlfilter_fns.resolve_address(ctx, sample->ip, &address_al))
+               return test_fail("resolve_address() failed");
+
+       CHECK(address_al.sym && al->sym);
+       CHECK(!strcmp(address_al.sym, al->sym));
+       CHECK(address_al.addr == al->addr);
+       CHECK(address_al.sym_start == al->sym_start);
+       CHECK(address_al.sym_end == al->sym_end);
+       CHECK(address_al.dso && al->dso);
+       CHECK(!strcmp(address_al.dso, al->dso));
+
+       return 0;
+}
+
 static int check_attr(void *ctx)
 {
        struct perf_event_attr *attr = perf_dlfilter_fns.attr(ctx);
@@ -290,7 +314,7 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void
        if (early && !d->do_early)
                return 0;
 
-       if (check_al(ctx) || check_addr_al(ctx))
+       if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample))
                return -1;
 
        if (early)
diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c
new file mode 100644 (file)
index 0000000..38e593d
--- /dev/null
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test v2 API for perf --dlfilter shared object
+ * Copyright (c) 2023, Intel Corporation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+/*
+ * Copy v2 API instead of including current API
+ */
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+/*
+ * The following macro can be used to determine if this header defines
+ * perf_dlfilter_sample machine_pid and vcpu.
+ */
+#define PERF_DLFILTER_HAS_MACHINE_PID
+
+/* Definitions for perf_dlfilter_sample flags */
+enum {
+       PERF_DLFILTER_FLAG_BRANCH       = 1ULL << 0,
+       PERF_DLFILTER_FLAG_CALL         = 1ULL << 1,
+       PERF_DLFILTER_FLAG_RETURN       = 1ULL << 2,
+       PERF_DLFILTER_FLAG_CONDITIONAL  = 1ULL << 3,
+       PERF_DLFILTER_FLAG_SYSCALLRET   = 1ULL << 4,
+       PERF_DLFILTER_FLAG_ASYNC        = 1ULL << 5,
+       PERF_DLFILTER_FLAG_INTERRUPT    = 1ULL << 6,
+       PERF_DLFILTER_FLAG_TX_ABORT     = 1ULL << 7,
+       PERF_DLFILTER_FLAG_TRACE_BEGIN  = 1ULL << 8,
+       PERF_DLFILTER_FLAG_TRACE_END    = 1ULL << 9,
+       PERF_DLFILTER_FLAG_IN_TX        = 1ULL << 10,
+       PERF_DLFILTER_FLAG_VMENTRY      = 1ULL << 11,
+       PERF_DLFILTER_FLAG_VMEXIT       = 1ULL << 12,
+};
+
+/*
+ * perf sample event information (as per perf script and <linux/perf_event.h>)
+ */
+struct perf_dlfilter_sample {
+       __u32 size; /* Size of this structure (for compatibility checking) */
+       __u16 ins_lat;          /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+       __u16 p_stage_cyc;      /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+       __u64 ip;
+       __s32 pid;
+       __s32 tid;
+       __u64 time;
+       __u64 addr;
+       __u64 id;
+       __u64 stream_id;
+       __u64 period;
+       __u64 weight;           /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
+       __u64 transaction;      /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */
+       __u64 insn_cnt; /* For instructions-per-cycle (IPC) */
+       __u64 cyc_cnt;          /* For instructions-per-cycle (IPC) */
+       __s32 cpu;
+       __u32 flags;            /* Refer PERF_DLFILTER_FLAG_* above */
+       __u64 data_src;         /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */
+       __u64 phys_addr;        /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */
+       __u64 data_page_size;   /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */
+       __u64 code_page_size;   /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */
+       __u64 cgroup;           /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */
+       __u8  cpumode;          /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */
+       __u8  addr_correlates_sym; /* True => resolve_addr() can be called */
+       __u16 misc;             /* Refer perf_event_header in <linux/perf_event.h> */
+       __u32 raw_size;         /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+       const void *raw_data;   /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
+       __u64 brstack_nr;       /* Number of brstack entries */
+       const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */
+       __u64 raw_callchain_nr; /* Number of raw_callchain entries */
+       const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
+       const char *event;
+       __s32 machine_pid;
+       __s32 vcpu;
+};
+
+/*
+ * Address location (as per perf script)
+ */
+struct perf_dlfilter_al {
+       __u32 size; /* Size of this structure (for compatibility checking) */
+       __u32 symoff;
+       const char *sym;
+       __u64 addr; /* Mapped address (from dso) */
+       __u64 sym_start;
+       __u64 sym_end;
+       const char *dso;
+       __u8  sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */
+       __u8  is_64_bit; /* Only valid if dso is not NULL */
+       __u8  is_kernel_ip; /* True if in kernel space */
+       __u32 buildid_size;
+       __u8 *buildid;
+       /* Below members are only populated by resolve_ip() */
+       __u8 filtered; /* True if this sample event will be filtered out */
+       const char *comm;
+       void *priv; /* Private data (v2 API) */
+};
+
+struct perf_dlfilter_fns {
+       /* Return information about ip */
+       const struct perf_dlfilter_al *(*resolve_ip)(void *ctx);
+       /* Return information about addr (if addr_correlates_sym) */
+       const struct perf_dlfilter_al *(*resolve_addr)(void *ctx);
+       /* Return arguments from --dlarg option */
+       char **(*args)(void *ctx, int *dlargc);
+       /*
+        * Return information about address (al->size must be set before
+        * calling). Returns 0 on success, -1 otherwise. Call al_cleanup()
+        * when 'al' data is no longer needed.
+        */
+       __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
+       /* Return instruction bytes and length */
+       const __u8 *(*insn)(void *ctx, __u32 *length);
+       /* Return source file name and line number */
+       const char *(*srcline)(void *ctx, __u32 *line_number);
+       /* Return perf_event_attr, refer <linux/perf_event.h> */
+       struct perf_event_attr *(*attr)(void *ctx);
+       /* Read object code, return numbers of bytes read */
+       __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
+       /*
+        * If present (i.e. must check al_cleanup != NULL), call after
+        * resolve_address() to free any associated resources. (v2 API)
+        */
+       void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
+       /* Reserved */
+       void *(*reserved[119])(void *);
+};
+
+struct perf_dlfilter_fns perf_dlfilter_fns;
+
+static int verbose;
+
+#define pr_debug(fmt, ...) do { \
+               if (verbose > 0) \
+                       fprintf(stderr, fmt, ##__VA_ARGS__); \
+       } while (0)
+
+static int test_fail(const char *msg)
+{
+       pr_debug("%s\n", msg);
+       return -1;
+}
+
+#define CHECK(x) do { \
+               if (!(x)) \
+                       return test_fail("Check '" #x "' failed\n"); \
+       } while (0)
+
+struct filter_data {
+       __u64 ip;
+       __u64 addr;
+       int do_early;
+       int early_filter_cnt;
+       int filter_cnt;
+};
+
+static struct filter_data *filt_dat;
+
+int start(void **data, void *ctx)
+{
+       int dlargc;
+       char **dlargv;
+       struct filter_data *d;
+       static bool called;
+
+       verbose = 1;
+
+       CHECK(!filt_dat && !called);
+       called = true;
+
+       d = calloc(1, sizeof(*d));
+       if (!d)
+               test_fail("Failed to allocate memory");
+       filt_dat = d;
+       *data = d;
+
+       dlargv = perf_dlfilter_fns.args(ctx, &dlargc);
+
+       CHECK(dlargc == 6);
+       CHECK(!strcmp(dlargv[0], "first"));
+       verbose = strtol(dlargv[1], NULL, 0);
+       d->ip = strtoull(dlargv[2], NULL, 0);
+       d->addr = strtoull(dlargv[3], NULL, 0);
+       d->do_early = strtol(dlargv[4], NULL, 0);
+       CHECK(!strcmp(dlargv[5], "last"));
+
+       pr_debug("%s API\n", __func__);
+
+       return 0;
+}
+
+#define CHECK_SAMPLE(x) do { \
+               if (sample->x != expected.x) \
+                       return test_fail("'" #x "' not expected value\n"); \
+       } while (0)
+
+static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample *sample)
+{
+       struct perf_dlfilter_sample expected = {
+               .ip             = d->ip,
+               .pid            = 12345,
+               .tid            = 12346,
+               .time           = 1234567890,
+               .addr           = d->addr,
+               .id             = 99,
+               .stream_id      = 101,
+               .period         = 543212345,
+               .cpu            = 31,
+               .cpumode        = PERF_RECORD_MISC_USER,
+               .addr_correlates_sym = 1,
+               .misc           = PERF_RECORD_MISC_USER,
+       };
+
+       CHECK(sample->size >= sizeof(struct perf_dlfilter_sample));
+
+       CHECK_SAMPLE(ip);
+       CHECK_SAMPLE(pid);
+       CHECK_SAMPLE(tid);
+       CHECK_SAMPLE(time);
+       CHECK_SAMPLE(addr);
+       CHECK_SAMPLE(id);
+       CHECK_SAMPLE(stream_id);
+       CHECK_SAMPLE(period);
+       CHECK_SAMPLE(cpu);
+       CHECK_SAMPLE(cpumode);
+       CHECK_SAMPLE(addr_correlates_sym);
+       CHECK_SAMPLE(misc);
+
+       CHECK(!sample->raw_data);
+       CHECK_SAMPLE(brstack_nr);
+       CHECK(!sample->brstack);
+       CHECK_SAMPLE(raw_callchain_nr);
+       CHECK(!sample->raw_callchain);
+
+#define EVENT_NAME "branches:"
+       CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME)));
+
+       return 0;
+}
+
+static int check_al(void *ctx)
+{
+       const struct perf_dlfilter_al *al;
+
+       al = perf_dlfilter_fns.resolve_ip(ctx);
+       if (!al)
+               return test_fail("resolve_ip() failed");
+
+       CHECK(al->sym && !strcmp("foo", al->sym));
+       CHECK(!al->symoff);
+
+       return 0;
+}
+
+static int check_addr_al(void *ctx)
+{
+       const struct perf_dlfilter_al *addr_al;
+
+       addr_al = perf_dlfilter_fns.resolve_addr(ctx);
+       if (!addr_al)
+               return test_fail("resolve_addr() failed");
+
+       CHECK(addr_al->sym && !strcmp("bar", addr_al->sym));
+       CHECK(!addr_al->symoff);
+
+       return 0;
+}
+
+static int check_address_al(void *ctx, const struct perf_dlfilter_sample *sample)
+{
+       struct perf_dlfilter_al address_al;
+       const struct perf_dlfilter_al *al;
+
+       al = perf_dlfilter_fns.resolve_ip(ctx);
+       if (!al)
+               return test_fail("resolve_ip() failed");
+
+       address_al.size = sizeof(address_al);
+       if (perf_dlfilter_fns.resolve_address(ctx, sample->ip, &address_al))
+               return test_fail("resolve_address() failed");
+
+       CHECK(address_al.sym && al->sym);
+       CHECK(!strcmp(address_al.sym, al->sym));
+       CHECK(address_al.addr == al->addr);
+       CHECK(address_al.sym_start == al->sym_start);
+       CHECK(address_al.sym_end == al->sym_end);
+       CHECK(address_al.dso && al->dso);
+       CHECK(!strcmp(address_al.dso, al->dso));
+
+       /* al_cleanup() is v2 API so may not be present */
+       if (perf_dlfilter_fns.al_cleanup)
+               perf_dlfilter_fns.al_cleanup(ctx, &address_al);
+
+       return 0;
+}
+
+static int check_attr(void *ctx)
+{
+       struct perf_event_attr *attr = perf_dlfilter_fns.attr(ctx);
+
+       CHECK(attr);
+       CHECK(attr->type == PERF_TYPE_HARDWARE);
+       CHECK(attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+
+       return 0;
+}
+
+static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early)
+{
+       struct filter_data *d = data;
+
+       CHECK(data && filt_dat == data);
+
+       if (early) {
+               CHECK(!d->early_filter_cnt);
+               d->early_filter_cnt += 1;
+       } else {
+               CHECK(!d->filter_cnt);
+               CHECK(d->early_filter_cnt);
+               CHECK(d->do_early != 2);
+               d->filter_cnt += 1;
+       }
+
+       if (check_sample(data, sample))
+               return -1;
+
+       if (check_attr(ctx))
+               return -1;
+
+       if (early && !d->do_early)
+               return 0;
+
+       if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample))
+               return -1;
+
+       if (early)
+               return d->do_early == 2;
+
+       return 1;
+}
+
+int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
+{
+       pr_debug("%s API\n", __func__);
+
+       return do_checks(data, sample, ctx, true);
+}
+
+int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
+{
+       pr_debug("%s API\n", __func__);
+
+       return do_checks(data, sample, ctx, false);
+}
+
+int stop(void *data, void *ctx)
+{
+       static bool called;
+
+       pr_debug("%s API\n", __func__);
+
+       CHECK(data && filt_dat == data && !called);
+       called = true;
+
+       free(data);
+       filt_dat = NULL;
+       return 0;
+}
+
+const char *filter_description(const char **long_description)
+{
+       *long_description = "Filter used by the 'dlfilter C API' perf test";
+       return "dlfilter to test v2 C API";
+}
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
deleted file mode 100644 (file)
index 3bd7fc1..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
-    Description:
-
-    . Disable strace like syscall tracing (--no-syscalls), or try tracing
-      just some (-e *sleep).
-
-    . Attach a filter function to a kernel function, returning when it should
-      be considered, i.e. appear on the output.
-
-    . Run it system wide, so that any sleep of >= 5 seconds and < than 6
-      seconds gets caught.
-
-    . Ask for callgraphs using DWARF info, so that userspace can be unwound
-
-    . While this is running, run something like "sleep 5s".
-
-    . If we decide to add tv_nsec as well, then it becomes:
-
-      int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec)
-
-      I.e. add where it comes from (rqtp->tv_nsec) and where it will be
-      accessible in the function body (nsec)
-
-    # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/
-         0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5
-                                           hrtimer_nanosleep ([kernel.kallsyms])
-                                           __x64_sys_nanosleep ([kernel.kallsyms])
-                                           do_syscall_64 ([kernel.kallsyms])
-                                           entry_SYSCALL_64 ([kernel.kallsyms])
-                                           __GI___nanosleep (/usr/lib64/libc-2.26.so)
-                                           rpl_nanosleep (/usr/bin/sleep)
-                                           xnanosleep (/usr/bin/sleep)
-                                           main (/usr/bin/sleep)
-                                           __libc_start_main (/usr/lib64/libc-2.26.so)
-                                           _start (/usr/bin/sleep)
-    ^C#
-
-   Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
-*/
-
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-#define NSEC_PER_SEC   1000000000L
-
-SEC("hrtimer_nanosleep=hrtimer_nanosleep rqtp")
-int hrtimer_nanosleep(void *ctx, int err, long long sec)
-{
-       return sec / NSEC_PER_SEC == 5ULL;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
deleted file mode 100644 (file)
index 3e296c0..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-struct syscall_enter_args;
-
-SEC("raw_syscalls:sys_enter")
-int sys_enter(struct syscall_enter_args *args)
-{
-       return 0;
-}
-char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c
deleted file mode 100644 (file)
index e9080b0..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-struct __bpf_stdout__ {
-       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
-       __type(key, int);
-       __type(value, __u32);
-       __uint(max_entries, __NR_CPUS__);
-} __bpf_stdout__ SEC(".maps");
-
-#define puts(from) \
-       ({ const int __len = sizeof(from); \
-          char __from[sizeof(from)] = from;                    \
-          bpf_perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \
-                         &__from, __len & (sizeof(from) - 1)); })
-
-struct syscall_enter_args;
-
-SEC("raw_syscalls:sys_enter")
-int sys_enter(struct syscall_enter_args *args)
-{
-       puts("Hello, world\n");
-       return 0;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/examples/bpf/sys_enter_openat.c b/tools/perf/examples/bpf/sys_enter_openat.c
deleted file mode 100644 (file)
index c4481c3..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Hook into 'openat' syscall entry tracepoint
- *
- * Test it with:
- *
- * perf trace -e tools/perf/examples/bpf/sys_enter_openat.c cat /etc/passwd > /dev/null
- *
- * It'll catch some openat syscalls related to the dynamic linked and
- * the last one should be the one for '/etc/passwd'.
- *
- * The syscall_enter_openat_args can be used to get the syscall fields
- * and use them for filtering calls, i.e. use in expressions for
- * the return value.
- */
-
-#include <bpf/bpf.h>
-
-struct syscall_enter_openat_args {
-       unsigned long long unused;
-       long               syscall_nr;
-       long               dfd;
-       char               *filename_ptr;
-       long               flags;
-       long               mode;
-};
-
-int syscall_enter(openat)(struct syscall_enter_openat_args *args)
-{
-       return 1;
-}
-
-license(GPL);
index a26e2f1..16fc456 100644 (file)
@@ -91,6 +91,7 @@ struct perf_dlfilter_al {
        /* Below members are only populated by resolve_ip() */
        __u8 filtered; /* True if this sample event will be filtered out */
        const char *comm;
+       void *priv; /* Private data. Do not change */
 };
 
 struct perf_dlfilter_fns {
@@ -102,7 +103,8 @@ struct perf_dlfilter_fns {
        char **(*args)(void *ctx, int *dlargc);
        /*
         * Return information about address (al->size must be set before
-        * calling). Returns 0 on success, -1 otherwise.
+        * calling). Returns 0 on success, -1 otherwise. Call al_cleanup()
+        * when 'al' data is no longer needed.
         */
        __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al);
        /* Return instruction bytes and length */
@@ -113,8 +115,13 @@ struct perf_dlfilter_fns {
        struct perf_event_attr *(*attr)(void *ctx);
        /* Read object code, return numbers of bytes read */
        __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len);
+       /*
+        * If present (i.e. must check al_cleanup != NULL), call after
+        * resolve_address() to free any associated resources.
+        */
+       void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al);
        /* Reserved */
-       void *(*reserved[120])(void *);
+       void *(*reserved[119])(void *);
 };
 
 /*
index 38cae47..d3fc809 100644 (file)
@@ -18,7 +18,6 @@
 #include <subcmd/run-command.h>
 #include "util/parse-events.h"
 #include <subcmd/parse-options.h>
-#include "util/bpf-loader.h"
 #include "util/debug.h"
 #include "util/event.h"
 #include "util/util.h" // usage()
@@ -324,7 +323,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
        perf_config__exit();
        exit_browser(status);
        perf_env__exit(&perf_env);
-       bpf__clear();
 
        if (status)
                return status & 0xff;
index 150765f..1d18bb8 100644 (file)
@@ -35,3 +35,9 @@ $(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_L
        $(call rule_mkdir)
        $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@
 endif
+
+# pmu-events.c file is generated in the OUTPUT directory so it needs a
+# separate rule to depend on it properly
+$(OUTPUT)pmu-events/pmu-events.o: $(PMU_EVENTS_C)
+       $(call rule_mkdir)
+       $(call if_changed_dep,cc_o_c)
index fc06330..7a2b7b2 100644 (file)
@@ -93,9 +93,6 @@
         "ArchStdEvent": "L1D_CACHE_LMISS_RD"
     },
     {
-        "ArchStdEvent": "L1D_CACHE_LMISS"
-    },
-    {
         "ArchStdEvent": "L1I_CACHE_LMISS"
     },
     {
index 95c3024..88b23b8 100644 (file)
         "BriefDescription": "L2D OTB allocate"
     },
     {
-        "PublicDescription": "DTLB Translation cache hit on S1L2 walk cache entry",
-        "EventCode": "0xD801",
-        "EventName": "MMU_D_TRANS_CACHE_HIT_S1L2_WALK",
-        "BriefDescription": "DTLB Translation cache hit on S1L2 walk cache entry"
-    },
-    {
-        "PublicDescription": "DTLB Translation cache hit on S1L1 walk cache entry",
-        "EventCode": "0xD802",
-        "EventName": "MMU_D_TRANS_CACHE_HIT_S1L1_WALK",
-        "BriefDescription": "DTLB Translation cache hit on S1L1 walk cache entry"
-    },
-    {
-        "PublicDescription": "DTLB Translation cache hit on S1L0 walk cache entry",
-        "EventCode": "0xD803",
-        "EventName": "MMU_D_TRANS_CACHE_HIT_S1L0_WALK",
-        "BriefDescription": "DTLB Translation cache hit on S1L0 walk cache entry"
-    },
-    {
-        "PublicDescription": "DTLB Translation cache hit on S2L2 walk cache entry",
-        "EventCode": "0xD804",
-        "EventName": "MMU_D_TRANS_CACHE_HIT_S2L2_WALK",
-        "BriefDescription": "DTLB Translation cache hit on S2L2 walk cache entry"
-    },
-    {
-        "PublicDescription": "DTLB Translation cache hit on S2L1 walk cache entry",
-        "EventCode": "0xD805",
-        "EventName": "MMU_D_TRANS_CACHE_HIT_S2L1_WALK",
-        "BriefDescription": "DTLB Translation cache hit on S2L1 walk cache entry"
-    },
-    {
-        "PublicDescription": "DTLB Translation cache hit on S2L0 walk cache entry",
-        "EventCode": "0xD806",
-        "EventName": "MMU_D_TRANS_CACHE_HIT_S2L0_WALK",
-        "BriefDescription": "DTLB Translation cache hit on S2L0 walk cache entry"
-    },
-    {
-        "PublicDescription": "D-side S1 Page walk cache lookup",
-        "EventCode": "0xD807",
-        "EventName": "MMU_D_S1_WALK_CACHE_LOOKUP",
-        "BriefDescription": "D-side S1 Page walk cache lookup"
-    },
-    {
-        "PublicDescription": "D-side S1 Page walk cache refill",
-        "EventCode": "0xD808",
-        "EventName": "MMU_D_S1_WALK_CACHE_REFILL",
-        "BriefDescription": "D-side S1 Page walk cache refill"
-    },
-    {
-        "PublicDescription": "D-side S2 Page walk cache lookup",
-        "EventCode": "0xD809",
-        "EventName": "MMU_D_S2_WALK_CACHE_LOOKUP",
-        "BriefDescription": "D-side S2 Page walk cache lookup"
-    },
-    {
-        "PublicDescription": "D-side S2 Page walk cache refill",
-        "EventCode": "0xD80A",
-        "EventName": "MMU_D_S2_WALK_CACHE_REFILL",
-        "BriefDescription": "D-side S2 Page walk cache refill"
-    },
-    {
         "PublicDescription": "D-side Stage1 tablewalk fault",
         "EventCode": "0xD80B",
         "EventName": "MMU_D_S1_WALK_FAULT",
         "BriefDescription": "L2I OTB allocate"
     },
     {
-        "PublicDescription": "ITLB Translation cache hit on S1L2 walk cache entry",
-        "EventCode": "0xD901",
-        "EventName": "MMU_I_TRANS_CACHE_HIT_S1L2_WALK",
-        "BriefDescription": "ITLB Translation cache hit on S1L2 walk cache entry"
-    },
-    {
-        "PublicDescription": "ITLB Translation cache hit on S1L1 walk cache entry",
-        "EventCode": "0xD902",
-        "EventName": "MMU_I_TRANS_CACHE_HIT_S1L1_WALK",
-        "BriefDescription": "ITLB Translation cache hit on S1L1 walk cache entry"
-    },
-    {
-        "PublicDescription": "ITLB Translation cache hit on S1L0 walk cache entry",
-        "EventCode": "0xD903",
-        "EventName": "MMU_I_TRANS_CACHE_HIT_S1L0_WALK",
-        "BriefDescription": "ITLB Translation cache hit on S1L0 walk cache entry"
-    },
-    {
-        "PublicDescription": "ITLB Translation cache hit on S2L2 walk cache entry",
-        "EventCode": "0xD904",
-        "EventName": "MMU_I_TRANS_CACHE_HIT_S2L2_WALK",
-        "BriefDescription": "ITLB Translation cache hit on S2L2 walk cache entry"
-    },
-    {
-        "PublicDescription": "ITLB Translation cache hit on S2L1 walk cache entry",
-        "EventCode": "0xD905",
-        "EventName": "MMU_I_TRANS_CACHE_HIT_S2L1_WALK",
-        "BriefDescription": "ITLB Translation cache hit on S2L1 walk cache entry"
-    },
-    {
-        "PublicDescription": "ITLB Translation cache hit on S2L0 walk cache entry",
-        "EventCode": "0xD906",
-        "EventName": "MMU_I_TRANS_CACHE_HIT_S2L0_WALK",
-        "BriefDescription": "ITLB Translation cache hit on S2L0 walk cache entry"
-    },
-    {
-        "PublicDescription": "I-side S1 Page walk cache lookup",
-        "EventCode": "0xD907",
-        "EventName": "MMU_I_S1_WALK_CACHE_LOOKUP",
-        "BriefDescription": "I-side S1 Page walk cache lookup"
-    },
-    {
-        "PublicDescription": "I-side S1 Page walk cache refill",
-        "EventCode": "0xD908",
-        "EventName": "MMU_I_S1_WALK_CACHE_REFILL",
-        "BriefDescription": "I-side S1 Page walk cache refill"
-    },
-    {
-        "PublicDescription": "I-side S2 Page walk cache lookup",
-        "EventCode": "0xD909",
-        "EventName": "MMU_I_S2_WALK_CACHE_LOOKUP",
-        "BriefDescription": "I-side S2 Page walk cache lookup"
-    },
-    {
-        "PublicDescription": "I-side S2 Page walk cache refill",
-        "EventCode": "0xD90A",
-        "EventName": "MMU_I_S2_WALK_CACHE_REFILL",
-        "BriefDescription": "I-side S2 Page walk cache refill"
-    },
-    {
         "PublicDescription": "I-side Stage1 tablewalk fault",
         "EventCode": "0xD90B",
         "EventName": "MMU_I_S1_WALK_FAULT",
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
new file mode 100644 (file)
index 0000000..1e7e890
--- /dev/null
@@ -0,0 +1,362 @@
+[
+    {
+       "MetricExpr": "BR_MIS_PRED / BR_PRED",
+       "BriefDescription": "Branch predictor misprediction rate. May not count branches that are never resolved because they are in the misprediction shadow of an earlier branch",
+       "MetricGroup": "Branch Prediction",
+       "MetricName": "Misprediction"
+    },
+    {
+       "MetricExpr": "BR_MIS_PRED_RETIRED / BR_RETIRED",
+       "BriefDescription": "Branch predictor misprediction rate",
+       "MetricGroup": "Branch Prediction",
+       "MetricName": "Misprediction (retired)"
+    },
+    {
+       "MetricExpr": "BUS_ACCESS / ( BUS_CYCLES * 1)",
+       "BriefDescription": "Core-to-uncore bus utilization",
+       "MetricGroup": "Bus",
+       "MetricName": "Bus utilization"
+    },
+    {
+       "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE",
+       "BriefDescription": "L1D cache miss rate",
+       "MetricGroup": "Cache",
+       "MetricName": "L1D cache miss"
+    },
+    {
+       "MetricExpr": "L1D_CACHE_LMISS_RD / L1D_CACHE_RD",
+       "BriefDescription": "L1D cache read miss rate",
+       "MetricGroup": "Cache",
+       "MetricName": "L1D cache read miss"
+    },
+    {
+       "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE",
+       "BriefDescription": "L1I cache miss rate",
+       "MetricGroup": "Cache",
+       "MetricName": "L1I cache miss"
+    },
+    {
+       "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE",
+       "BriefDescription": "L2 cache miss rate",
+       "MetricGroup": "Cache",
+       "MetricName": "L2 cache miss"
+    },
+    {
+       "MetricExpr": "L1I_CACHE_LMISS / L1I_CACHE",
+       "BriefDescription": "L1I cache read miss rate",
+       "MetricGroup": "Cache",
+       "MetricName": "L1I cache read miss"
+    },
+    {
+       "MetricExpr": "L2D_CACHE_LMISS_RD / L2D_CACHE_RD",
+       "BriefDescription": "L2 cache read miss rate",
+       "MetricGroup": "Cache",
+       "MetricName": "L2 cache read miss"
+    },
+    {
+       "MetricExpr": "(L1D_CACHE_LMISS_RD * 1000) / INST_RETIRED",
+       "BriefDescription": "Misses per thousand instructions (data)",
+       "MetricGroup": "Cache",
+       "MetricName": "MPKI data"
+    },
+    {
+       "MetricExpr": "(L1I_CACHE_LMISS * 1000) / INST_RETIRED",
+       "BriefDescription": "Misses per thousand instructions (instruction)",
+       "MetricGroup": "Cache",
+       "MetricName": "MPKI instruction"
+    },
+    {
+       "MetricExpr": "ASE_SPEC / OP_SPEC",
+       "BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) operations",
+       "MetricGroup": "Instruction",
+       "MetricName": "ASE mix"
+    },
+    {
+       "MetricExpr": "CRYPTO_SPEC / OP_SPEC",
+       "BriefDescription": "Proportion of crypto data processing operations",
+       "MetricGroup": "Instruction",
+       "MetricName": "Crypto mix"
+    },
+    {
+       "MetricExpr": "VFP_SPEC / (duration_time *1000000000)",
+       "BriefDescription": "Giga-floating point operations per second",
+       "MetricGroup": "Instruction",
+       "MetricName": "GFLOPS_ISSUED"
+    },
+    {
+       "MetricExpr": "DP_SPEC / OP_SPEC",
+       "BriefDescription": "Proportion of integer data processing operations",
+       "MetricGroup": "Instruction",
+       "MetricName": "Integer mix"
+    },
+    {
+       "MetricExpr": "INST_RETIRED / CPU_CYCLES",
+       "BriefDescription": "Instructions per cycle",
+       "MetricGroup": "Instruction",
+       "MetricName": "IPC"
+    },
+    {
+       "MetricExpr": "LD_SPEC / OP_SPEC",
+       "BriefDescription": "Proportion of load operations",
+       "MetricGroup": "Instruction",
+       "MetricName": "Load mix"
+    },
+    {
+       "MetricExpr": "LDST_SPEC/ OP_SPEC",
+       "BriefDescription": "Proportion of load & store operations",
+       "MetricGroup": "Instruction",
+       "MetricName": "Load-store mix"
+    },
+    {
+       "MetricExpr": "INST_RETIRED / (duration_time * 1000000)",
+       "BriefDescription": "Millions of instructions per second",
+       "MetricGroup": "Instruction",
+       "MetricName": "MIPS_RETIRED"
+    },
+    {
+       "MetricExpr": "INST_SPEC / (duration_time * 1000000)",
+       "BriefDescription": "Millions of instructions per second",
+       "MetricGroup": "Instruction",
+       "MetricName": "MIPS_UTILIZATION"
+    },
+    {
+       "MetricExpr": "PC_WRITE_SPEC / OP_SPEC",
+       "BriefDescription": "Proportion of software change of PC operations",
+       "MetricGroup": "Instruction",
+       "MetricName": "PC write mix"
+    },
+    {
+       "MetricExpr": "ST_SPEC / OP_SPEC",
+       "BriefDescription": "Proportion of store operations",
+       "MetricGroup": "Instruction",
+       "MetricName": "Store mix"
+    },
+    {
+       "MetricExpr": "VFP_SPEC / OP_SPEC",
+       "BriefDescription": "Proportion of FP operations",
+       "MetricGroup": "Instruction",
+       "MetricName": "VFP mix"
+    },
+    {
+       "MetricExpr": "1 - (OP_RETIRED/ (CPU_CYCLES * 4))",
+       "BriefDescription": "Proportion of slots lost",
+       "MetricGroup": "Speculation / TDA",
+       "MetricName": "CPU lost"
+    },
+    {
+       "MetricExpr": "OP_RETIRED/ (CPU_CYCLES * 4)",
+       "BriefDescription": "Proportion of slots retiring",
+       "MetricGroup": "Speculation / TDA",
+       "MetricName": "CPU utilization"
+    },
+    {
+       "MetricExpr": "OP_RETIRED - OP_SPEC",
+       "BriefDescription": "Operations lost due to misspeculation",
+       "MetricGroup": "Speculation / TDA",
+       "MetricName": "Operations lost"
+    },
+    {
+       "MetricExpr": "1 - (OP_RETIRED / OP_SPEC)",
+       "BriefDescription": "Proportion of operations lost",
+       "MetricGroup": "Speculation / TDA",
+       "MetricName": "Operations lost (ratio)"
+    },
+    {
+       "MetricExpr": "OP_RETIRED / OP_SPEC",
+       "BriefDescription": "Proportion of operations retired",
+       "MetricGroup": "Speculation / TDA",
+       "MetricName": "Operations retired"
+    },
+    {
+       "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES",
+       "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and cache miss",
+       "MetricGroup": "Stall",
+       "MetricName": "Stall backend cache cycles"
+    },
+    {
+       "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES",
+       "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and resource full",
+       "MetricGroup": "Stall",
+       "MetricName": "Stall backend resource cycles"
+    },
+    {
+       "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES",
+       "BriefDescription": "Proportion of cycles stalled and no operations issued to backend and TLB miss",
+       "MetricGroup": "Stall",
+       "MetricName": "Stall backend tlb cycles"
+    },
+    {
+       "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES",
+       "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss",
+       "MetricGroup": "Stall",
+       "MetricName": "Stall frontend cache cycles"
+    },
+    {
+       "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES",
+       "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss",
+       "MetricGroup": "Stall",
+       "MetricName": "Stall frontend tlb cycles"
+    },
+    {
+       "MetricExpr": "DTLB_WALK / L1D_TLB",
+       "BriefDescription": "D-side walk per d-side translation request",
+       "MetricGroup": "TLB",
+       "MetricName": "DTLB walks"
+    },
+    {
+       "MetricExpr": "ITLB_WALK / L1I_TLB",
+       "BriefDescription": "I-side walk per i-side translation request",
+       "MetricGroup": "TLB",
+       "MetricName": "ITLB walks"
+    },
+    {
+        "MetricExpr": "STALL_SLOT_BACKEND / (CPU_CYCLES * 4)",
+        "BriefDescription": "Fraction of slots backend bound",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "backend"
+    },
+    {
+        "MetricExpr": "1 - (retiring + lost + backend)",
+        "BriefDescription": "Fraction of slots frontend bound",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "frontend"
+    },
+    {
+        "MetricExpr": "((OP_SPEC - OP_RETIRED) / (CPU_CYCLES * 4))",
+        "BriefDescription": "Fraction of slots lost due to misspeculation",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "lost"
+    },
+    {
+        "MetricExpr": "(OP_RETIRED / (CPU_CYCLES * 4))",
+        "BriefDescription": "Fraction of slots retiring, useful work",
+        "MetricGroup": "TopDownL1",
+        "MetricName": "retiring"
+    },
+    {
+        "MetricExpr": "backend - backend_memory",
+        "BriefDescription": "Fraction of slots the CPU was stalled due to backend non-memory subsystem issues",
+        "MetricGroup": "TopDownL2",
+        "MetricName": "backend_core"
+    },
+    {
+        "MetricExpr": "(STALL_BACKEND_TLB + STALL_BACKEND_CACHE + STALL_BACKEND_MEM) / CPU_CYCLES ",
+        "BriefDescription": "Fraction of slots the CPU was stalled due to backend memory subsystem issues (cache/tlb miss)",
+        "MetricGroup": "TopDownL2",
+        "MetricName": "backend_memory"
+    },
+    {
+        "MetricExpr": " (BR_MIS_PRED_RETIRED / GPC_FLUSH) * lost",
+        "BriefDescription": "Fraction of slots lost due to branch misprediciton",
+        "MetricGroup": "TopDownL2",
+        "MetricName": "branch_mispredict"
+    },
+    {
+        "MetricExpr": "frontend - frontend_latency",
+        "BriefDescription": "Fraction of slots the CPU did not dispatch at full bandwidth - able to dispatch partial slots only (1, 2, or 3 uops)",
+        "MetricGroup": "TopDownL2",
+        "MetricName": "frontend_bandwidth"
+    },
+    {
+        "MetricExpr": "(STALL_FRONTEND - ((STALL_SLOT_FRONTEND - (frontend * CPU_CYCLES * 4)) / 4)) / CPU_CYCLES",
+        "BriefDescription": "Fraction of slots the CPU was stalled due to frontend latency issues (cache/tlb miss); nothing to dispatch",
+        "MetricGroup": "TopDownL2",
+        "MetricName": "frontend_latency"
+    },
+    {
+        "MetricExpr": "lost - branch_mispredict",
+        "BriefDescription": "Fraction of slots lost due to other/non-branch misprediction misspeculation",
+        "MetricGroup": "TopDownL2",
+        "MetricName": "other_clears"
+    },
+    {
+        "MetricExpr": "(IXU_NUM_UOPS_ISSUED + FSU_ISSUED) / (CPU_CYCLES * 6)",
+        "BriefDescription": "Fraction of execute slots utilized",
+        "MetricGroup": "TopDownL2",
+        "MetricName": "pipe_utilization"
+    },
+    {
+        "MetricExpr": "STALL_BACKEND_MEM / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to data L2 cache miss",
+        "MetricGroup": "TopDownL3",
+        "MetricName": "d_cache_l2_miss"
+    },
+    {
+        "MetricExpr": "STALL_BACKEND_CACHE / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to data cache miss",
+        "MetricGroup": "TopDownL3",
+        "MetricName": "d_cache_miss"
+    },
+    {
+        "MetricExpr": "STALL_BACKEND_TLB / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to data TLB miss",
+        "MetricGroup": "TopDownL3",
+        "MetricName": "d_tlb_miss"
+    },
+    {
+        "MetricExpr": "FSU_ISSUED / (CPU_CYCLES * 2)",
+        "BriefDescription": "Fraction of FSU execute slots utilized",
+        "MetricGroup": "TopDownL3",
+        "MetricName": "fsu_pipe_utilization"
+    },
+    {
+        "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction cache miss",
+        "MetricGroup": "TopDownL3",
+        "MetricName": "i_cache_miss"
+    },
+    {
+        "MetricExpr": " STALL_FRONTEND_TLB / CPU_CYCLES ",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to instruction TLB miss",
+        "MetricGroup": "TopDownL3",
+        "MetricName": "i_tlb_miss"
+    },
+    {
+        "MetricExpr": "IXU_NUM_UOPS_ISSUED / (CPU_CYCLES / 4)",
+        "BriefDescription": "Fraction of IXU execute slots utilized",
+        "MetricGroup": "TopDownL3",
+        "MetricName": "ixu_pipe_utilization"
+    },
+    {
+        "MetricExpr": "IDR_STALL_FLUSH / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to flush recovery",
+        "MetricGroup": "TopDownL3",
+        "MetricName": "recovery"
+    },
+    {
+        "MetricExpr": "STALL_BACKEND_RESOURCE / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled due to core resource shortage",
+        "MetricGroup": "TopDownL3",
+        "MetricName": "resource"
+    },
+    {
+        "MetricExpr": "IDR_STALL_FSU_SCHED / CPU_CYCLES ",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and FSU was full",
+        "MetricGroup": "TopDownL4",
+        "MetricName": "stall_fsu_sched"
+    },
+    {
+        "MetricExpr": "IDR_STALL_IXU_SCHED / CPU_CYCLES ",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and IXU was full",
+        "MetricGroup": "TopDownL4",
+        "MetricName": "stall_ixu_sched"
+    },
+    {
+        "MetricExpr": "IDR_STALL_LOB_ID / CPU_CYCLES ",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and LOB was full",
+        "MetricGroup": "TopDownL4",
+        "MetricName": "stall_lob_id"
+    },
+    {
+        "MetricExpr": "IDR_STALL_ROB_ID / CPU_CYCLES",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and ROB was full",
+        "MetricGroup": "TopDownL4",
+        "MetricName": "stall_rob_id"
+    },
+    {
+        "MetricExpr": "IDR_STALL_SOB_ID / CPU_CYCLES ",
+        "BriefDescription": "Fraction of cycles the CPU was stalled and SOB was full",
+        "MetricGroup": "TopDownL4",
+        "MetricName": "stall_sob_id"
+    }
+]
index f9fae15..7110283 100644 (file)
@@ -1,18 +1,24 @@
 [
     {
-        "ArchStdEvent": "STALL_FRONTEND"
+        "ArchStdEvent": "STALL_FRONTEND",
+        "Errata": "Errata AC03_CPU_29",
+        "BriefDescription": "Impacted by errata, use metrics instead -"
     },
     {
         "ArchStdEvent": "STALL_BACKEND"
     },
     {
-        "ArchStdEvent": "STALL"
+        "ArchStdEvent": "STALL",
+        "Errata": "Errata AC03_CPU_29",
+        "BriefDescription": "Impacted by errata, use metrics instead -"
     },
     {
         "ArchStdEvent": "STALL_SLOT_BACKEND"
     },
     {
-        "ArchStdEvent": "STALL_SLOT_FRONTEND"
+        "ArchStdEvent": "STALL_SLOT_FRONTEND",
+        "Errata": "Errata AC03_CPU_29",
+        "BriefDescription": "Impacted by errata, use metrics instead -"
     },
     {
         "ArchStdEvent": "STALL_SLOT"
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json
deleted file mode 100644 (file)
index 79f2016..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-[
-    {
-        "ArchStdEvent": "BR_MIS_PRED"
-    },
-    {
-        "ArchStdEvent": "BR_PRED"
-    }
-]
index 579c1c9..2e11a8c 100644 (file)
@@ -1,20 +1,18 @@
 [
     {
-        "ArchStdEvent": "CPU_CYCLES"
+        "ArchStdEvent": "BUS_ACCESS",
+        "PublicDescription": "Counts memory transactions issued by the CPU to the external bus, including snoop requests and snoop responses. Each beat of data is counted individually."
     },
     {
-        "ArchStdEvent": "BUS_ACCESS"
+        "ArchStdEvent": "BUS_CYCLES",
+        "PublicDescription": "Counts bus cycles in the CPU. Bus cycles represent a clock cycle in which a transaction could be sent or received on the interface from the CPU to the external bus. Since that interface is driven at the same clock speed as the CPU, this event is a duplicate of CPU_CYCLES."
     },
     {
-        "ArchStdEvent": "BUS_CYCLES"
+        "ArchStdEvent": "BUS_ACCESS_RD",
+        "PublicDescription": "Counts memory read transactions seen on the external bus. Each beat of data is counted individually."
     },
     {
-        "ArchStdEvent": "BUS_ACCESS_RD"
-    },
-    {
-        "ArchStdEvent": "BUS_ACCESS_WR"
-    },
-    {
-        "ArchStdEvent": "CNT_CYCLES"
+        "ArchStdEvent": "BUS_ACCESS_WR",
+        "PublicDescription": "Counts memory write transactions seen on the external bus. Each beat of data is counted individually."
     }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json
deleted file mode 100644 (file)
index 0141f74..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-[
-    {
-        "ArchStdEvent": "L1I_CACHE_REFILL"
-    },
-    {
-        "ArchStdEvent": "L1I_TLB_REFILL"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_REFILL"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE"
-    },
-    {
-        "ArchStdEvent": "L1D_TLB_REFILL"
-    },
-    {
-        "ArchStdEvent": "L1I_CACHE"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_WB"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_REFILL"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_WB"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_ALLOCATE"
-    },
-    {
-        "ArchStdEvent": "L1D_TLB"
-    },
-    {
-        "ArchStdEvent": "L1I_TLB"
-    },
-    {
-        "ArchStdEvent": "L3D_CACHE_ALLOCATE"
-    },
-    {
-        "ArchStdEvent": "L3D_CACHE_REFILL"
-    },
-    {
-        "ArchStdEvent": "L3D_CACHE"
-    },
-    {
-        "ArchStdEvent": "L2D_TLB_REFILL"
-    },
-    {
-        "ArchStdEvent": "L2D_TLB"
-    },
-    {
-        "ArchStdEvent": "DTLB_WALK"
-    },
-    {
-        "ArchStdEvent": "ITLB_WALK"
-    },
-    {
-        "ArchStdEvent": "LL_CACHE_RD"
-    },
-    {
-        "ArchStdEvent": "LL_CACHE_MISS_RD"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_LMISS_RD"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_RD"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_WR"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_REFILL_RD"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_REFILL_WR"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_REFILL_INNER"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_WB_VICTIM"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_WB_CLEAN"
-    },
-    {
-        "ArchStdEvent": "L1D_CACHE_INVAL"
-    },
-    {
-        "ArchStdEvent": "L1D_TLB_REFILL_RD"
-    },
-    {
-        "ArchStdEvent": "L1D_TLB_REFILL_WR"
-    },
-    {
-        "ArchStdEvent": "L1D_TLB_RD"
-    },
-    {
-        "ArchStdEvent": "L1D_TLB_WR"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_RD"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_WR"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_REFILL_RD"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_REFILL_WR"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_WB_VICTIM"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_WB_CLEAN"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_INVAL"
-    },
-    {
-        "ArchStdEvent": "L2D_TLB_REFILL_RD"
-    },
-    {
-        "ArchStdEvent": "L2D_TLB_REFILL_WR"
-    },
-    {
-        "ArchStdEvent": "L2D_TLB_RD"
-    },
-    {
-        "ArchStdEvent": "L2D_TLB_WR"
-    },
-    {
-        "ArchStdEvent": "L3D_CACHE_RD"
-    },
-    {
-        "ArchStdEvent": "L1I_CACHE_LMISS"
-    },
-    {
-        "ArchStdEvent": "L2D_CACHE_LMISS_RD"
-    },
-    {
-        "ArchStdEvent": "L3D_CACHE_LMISS_RD"
-    }
-]
index 344a2d5..4404b8e 100644 (file)
@@ -1,47 +1,62 @@
 [
     {
-        "ArchStdEvent": "EXC_TAKEN"
+        "ArchStdEvent": "EXC_TAKEN",
+        "PublicDescription": "Counts any taken architecturally visible exceptions such as IRQ, FIQ, SError, and other synchronous exceptions. Exceptions are counted whether or not they are taken locally."
     },
     {
-        "ArchStdEvent": "MEMORY_ERROR"
+        "ArchStdEvent": "EXC_RETURN",
+        "PublicDescription": "Counts any architecturally executed exception return instructions. Eg: AArch64: ERET"
     },
     {
-        "ArchStdEvent": "EXC_UNDEF"
+        "ArchStdEvent": "EXC_UNDEF",
+        "PublicDescription": "Counts the number of synchronous exceptions which are taken locally that are due to attempting to execute an instruction that is UNDEFINED. Attempting to execute instruction bit patterns that have not been allocated. Attempting to execute instructions when they are disabled. Attempting to execute instructions at an inappropriate Exception level. Attempting to execute an instruction when the value of PSTATE.IL is 1."
     },
     {
-        "ArchStdEvent": "EXC_SVC"
+        "ArchStdEvent": "EXC_SVC",
+        "PublicDescription": "Counts SVC exceptions taken locally."
     },
     {
-        "ArchStdEvent": "EXC_PABORT"
+        "ArchStdEvent": "EXC_PABORT",
+        "PublicDescription": "Counts synchronous exceptions that are taken locally and caused by Instruction Aborts."
     },
     {
-        "ArchStdEvent": "EXC_DABORT"
+        "ArchStdEvent": "EXC_DABORT",
+        "PublicDescription": "Counts exceptions that are taken locally and are caused by data aborts or SErrors. Conditions that could cause those exceptions are attempting to read or write memory where the MMU generates a fault, attempting to read or write memory with a misaligned address, interrupts from the nSEI inputs and internally generated SErrors."
     },
     {
-        "ArchStdEvent": "EXC_IRQ"
+        "ArchStdEvent": "EXC_IRQ",
+        "PublicDescription": "Counts IRQ exceptions including the virtual IRQs that are taken locally."
     },
     {
-        "ArchStdEvent": "EXC_FIQ"
+        "ArchStdEvent": "EXC_FIQ",
+        "PublicDescription": "Counts FIQ exceptions including the virtual FIQs that are taken locally."
     },
     {
-        "ArchStdEvent": "EXC_SMC"
+        "ArchStdEvent": "EXC_SMC",
+        "PublicDescription": "Counts SMC exceptions take to EL3."
     },
     {
-        "ArchStdEvent": "EXC_HVC"
+        "ArchStdEvent": "EXC_HVC",
+        "PublicDescription": "Counts HVC exceptions taken to EL2."
     },
     {
-        "ArchStdEvent": "EXC_TRAP_PABORT"
+        "ArchStdEvent": "EXC_TRAP_PABORT",
+        "PublicDescription": "Counts exceptions which are traps not taken locally and are caused by Instruction Aborts. For example, attempting to execute an instruction with a misaligned PC."
     },
     {
-        "ArchStdEvent": "EXC_TRAP_DABORT"
+        "ArchStdEvent": "EXC_TRAP_DABORT",
+        "PublicDescription": "Counts exceptions which are traps not taken locally and are caused by Data Aborts or SError interrupts. Conditions that could cause those exceptions are:\n\n1. Attempting to read or write memory where the MMU generates a fault,\n2. Attempting to read or write memory with a misaligned address,\n3. Interrupts from the SEI input.\n4. internally generated SErrors."
     },
     {
-        "ArchStdEvent": "EXC_TRAP_OTHER"
+        "ArchStdEvent": "EXC_TRAP_OTHER",
+        "PublicDescription": "Counts the number of synchronous trap exceptions which are not taken locally and are not SVC, SMC, HVC, data aborts, Instruction Aborts, or interrupts."
     },
     {
-        "ArchStdEvent": "EXC_TRAP_IRQ"
+        "ArchStdEvent": "EXC_TRAP_IRQ",
+        "PublicDescription": "Counts IRQ exceptions including the virtual IRQs that are not taken locally."
     },
     {
-        "ArchStdEvent": "EXC_TRAP_FIQ"
+        "ArchStdEvent": "EXC_TRAP_FIQ",
+        "PublicDescription": "Counts FIQs which are not taken locally but taken from EL0, EL1,\n or EL2 to EL3 (which would be the normal behavior for FIQs when not executing\n in EL3)."
     }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/fp_operation.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/fp_operation.json
new file mode 100644 (file)
index 0000000..cec3435
--- /dev/null
@@ -0,0 +1,22 @@
+[
+    {
+        "ArchStdEvent": "FP_HP_SPEC",
+        "PublicDescription": "Counts speculatively executed half precision floating point operations."
+    },
+    {
+        "ArchStdEvent": "FP_SP_SPEC",
+        "PublicDescription": "Counts speculatively executed single precision floating point operations."
+    },
+    {
+        "ArchStdEvent": "FP_DP_SPEC",
+        "PublicDescription": "Counts speculatively executed double precision floating point operations."
+    },
+    {
+        "ArchStdEvent": "FP_SCALE_OPS_SPEC",
+        "PublicDescription": "Counts speculatively executed scalable single precision floating point operations."
+    },
+    {
+        "ArchStdEvent": "FP_FIXED_OPS_SPEC",
+        "PublicDescription": "Counts speculatively executed non-scalable single precision floating point operations."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json
new file mode 100644 (file)
index 0000000..428810f
--- /dev/null
@@ -0,0 +1,10 @@
+[
+    {
+        "ArchStdEvent": "CPU_CYCLES",
+        "PublicDescription": "Counts CPU clock cycles (not timer cycles). The clock measured by this event is defined as the physical clock driving the CPU logic."
+    },
+    {
+        "ArchStdEvent": "CNT_CYCLES",
+        "PublicDescription": "Counts constant frequency cycles"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json
deleted file mode 100644 (file)
index e57cd55..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-[
-    {
-        "ArchStdEvent": "SW_INCR"
-    },
-    {
-        "ArchStdEvent": "INST_RETIRED"
-    },
-    {
-        "ArchStdEvent": "EXC_RETURN"
-    },
-    {
-        "ArchStdEvent": "CID_WRITE_RETIRED"
-    },
-    {
-        "ArchStdEvent": "INST_SPEC"
-    },
-    {
-        "ArchStdEvent": "TTBR_WRITE_RETIRED"
-    },
-    {
-        "ArchStdEvent": "BR_RETIRED"
-    },
-    {
-        "ArchStdEvent": "BR_MIS_PRED_RETIRED"
-    },
-    {
-        "ArchStdEvent": "OP_RETIRED"
-    },
-    {
-        "ArchStdEvent": "OP_SPEC"
-    },
-    {
-        "ArchStdEvent": "LDREX_SPEC"
-    },
-    {
-        "ArchStdEvent": "STREX_PASS_SPEC"
-    },
-    {
-        "ArchStdEvent": "STREX_FAIL_SPEC"
-    },
-    {
-        "ArchStdEvent": "STREX_SPEC"
-    },
-    {
-        "ArchStdEvent": "LD_SPEC"
-    },
-    {
-        "ArchStdEvent": "ST_SPEC"
-    },
-    {
-        "ArchStdEvent": "DP_SPEC"
-    },
-    {
-        "ArchStdEvent": "ASE_SPEC"
-    },
-    {
-        "ArchStdEvent": "VFP_SPEC"
-    },
-    {
-        "ArchStdEvent": "PC_WRITE_SPEC"
-    },
-    {
-        "ArchStdEvent": "CRYPTO_SPEC"
-    },
-    {
-        "ArchStdEvent": "BR_IMMED_SPEC"
-    },
-    {
-        "ArchStdEvent": "BR_RETURN_SPEC"
-    },
-    {
-        "ArchStdEvent": "BR_INDIRECT_SPEC"
-    },
-    {
-        "ArchStdEvent": "ISB_SPEC"
-    },
-    {
-        "ArchStdEvent": "DSB_SPEC"
-    },
-    {
-        "ArchStdEvent": "DMB_SPEC"
-    },
-    {
-        "ArchStdEvent": "RC_LD_SPEC"
-    },
-    {
-        "ArchStdEvent": "RC_ST_SPEC"
-    },
-    {
-        "ArchStdEvent": "ASE_INST_SPEC"
-    },
-    {
-        "ArchStdEvent": "SVE_INST_SPEC"
-    },
-    {
-        "ArchStdEvent": "FP_HP_SPEC"
-    },
-    {
-        "ArchStdEvent": "FP_SP_SPEC"
-    },
-    {
-        "ArchStdEvent": "FP_DP_SPEC"
-    },
-    {
-        "ArchStdEvent": "SVE_PRED_SPEC"
-    },
-    {
-        "ArchStdEvent": "SVE_PRED_EMPTY_SPEC"
-    },
-    {
-        "ArchStdEvent": "SVE_PRED_FULL_SPEC"
-    },
-    {
-        "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC"
-    },
-    {
-        "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC"
-    },
-    {
-        "ArchStdEvent": "SVE_LDFF_SPEC"
-    },
-    {
-        "ArchStdEvent": "SVE_LDFF_FAULT_SPEC"
-    },
-    {
-        "ArchStdEvent": "FP_SCALE_OPS_SPEC"
-    },
-    {
-        "ArchStdEvent": "FP_FIXED_OPS_SPEC"
-    },
-    {
-        "ArchStdEvent": "ASE_SVE_INT8_SPEC"
-    },
-    {
-        "ArchStdEvent": "ASE_SVE_INT16_SPEC"
-    },
-    {
-        "ArchStdEvent": "ASE_SVE_INT32_SPEC"
-    },
-    {
-        "ArchStdEvent": "ASE_SVE_INT64_SPEC"
-    }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json
new file mode 100644 (file)
index 0000000..da7c129
--- /dev/null
@@ -0,0 +1,54 @@
+[
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL",
+        "PublicDescription": "Counts level 1 data cache refills caused by speculatively executed load or store operations that missed in the level 1 data cache. This event only counts one event per cache line. This event does not count cache line allocations from preload instructions or from hardware cache prefetching."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE",
+        "PublicDescription": "Counts level 1 data cache accesses from any load/store operations. Atomic operations that resolve in the CPUs caches (near atomic operations) count as both a write access and read access. Each access to a cache line is counted including the multiple accesses caused by single instructions such as LDM or STM. Each access to other level 1 data or unified memory structures, for example refill buffers, write buffers, and write-back buffers, are also counted."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB",
+        "PublicDescription": "Counts write-backs of dirty data from the L1 data cache to the L2 cache. This occurs when either a dirty cache line is evicted from L1 data cache and allocated in the L2 cache or dirty data is written to the L2 and possibly to the next level of cache. This event counts both victim cache line evictions and cache write-backs from snoops or cache maintenance operations. The following cache operations are not counted:\n\n1. Invalidations which do not result in data being transferred out of the L1 (such as evictions of clean data),\n2. Full line writes which write to L2 without writing L1, such as write streaming mode."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_LMISS_RD",
+        "PublicDescription": "Counts cache line refills into the level 1 data cache from any memory read operations, that incurred additional latency."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_RD",
+        "PublicDescription": "Counts level 1 data cache accesses from any load operation. Atomic load operations that resolve in the CPUs caches count as both a write access and read access."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR",
+        "PublicDescription": "Counts level 1 data cache accesses generated by store operations. This event also counts accesses caused by a DC ZVA (data cache zero, specified by virtual address) instruction. Near atomic operations that resolve in the CPUs caches count as a write access and read access."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+        "PublicDescription": "Counts level 1 data cache refills caused by speculatively executed load instructions where the memory read operation misses in the level 1 data cache. This event only counts one event per cache line."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_WR",
+        "PublicDescription": "Counts level 1 data cache refills caused by speculatively executed store instructions where the memory write operation misses in the level 1 data cache. This event only counts one event per cache line."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_INNER",
+        "PublicDescription": "Counts level 1 data cache refills where the cache line data came from caches inside the immediate cluster of the core."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_OUTER",
+        "PublicDescription": "Counts level 1 data cache refills for which the cache line data came from outside the immediate cluster of the core, like an SLC in the system interconnect or DRAM."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_VICTIM",
+        "PublicDescription": "Counts dirty cache line evictions from the level 1 data cache caused by a new cache line allocation. This event does not count evictions caused by cache maintenance operations."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB_CLEAN",
+        "PublicDescription": "Counts write-backs from the level 1 data cache that are a result of a coherency operation made by another CPU. Event count includes cache maintenance operations."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_INVAL",
+        "PublicDescription": "Counts each explicit invalidation of a cache line in the level 1 data cache caused by:\n\n- Cache Maintenance Operations (CMO) that operate by a virtual address.\n- Broadcast cache coherency operations from another CPU in the system.\n\nThis event does not count for the following conditions:\n\n1. A cache refill invalidates a cache line.\n2. A CMO which is executed on that CPU and invalidates a cache line specified by set/way.\n\nNote that CMOs that operate by set/way cannot be broadcast from one CPU to another."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1i_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1i_cache.json
new file mode 100644 (file)
index 0000000..633f103
--- /dev/null
@@ -0,0 +1,14 @@
+[
+    {
+        "ArchStdEvent": "L1I_CACHE_REFILL",
+        "PublicDescription": "Counts cache line refills in the level 1 instruction cache caused by a missed instruction fetch. Instruction fetches may include accessing multiple instructions, but the single cache line allocation is counted once."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE",
+        "PublicDescription": "Counts instruction fetches which access the level 1 instruction cache. Instruction cache accesses caused by cache maintenance operations are not counted."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_LMISS",
+        "PublicDescription": "Counts cache line refills into the level 1 instruction cache, that incurred additional latency."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json
new file mode 100644 (file)
index 0000000..0e31d0d
--- /dev/null
@@ -0,0 +1,50 @@
+[
+    {
+        "ArchStdEvent": "L2D_CACHE",
+        "PublicDescription": "Counts level 2 cache accesses. level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the first level caches or translation resolutions due to accesses. This event also counts write back of dirty data from level 1 data cache to the L2 cache."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL",
+        "PublicDescription": "Counts cache line refills into the level 2 cache. level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB",
+        "PublicDescription": "Counts write-backs of data from the L2 cache to outside the CPU. This includes snoops to the L2 (from other CPUs) which return data even if the snoops cause an invalidation. L2 cache line invalidations which do not write data outside the CPU and snoops which return data from an L1 cache are not counted. Data would not be written outside the cache when invalidating a clean cache line."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_ALLOCATE",
+        "PublicDescription": "TBD"
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_RD",
+        "PublicDescription": "Counts level 2 cache accesses due to memory read operations. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WR",
+        "PublicDescription": "Counts level 2 cache accesses due to memory write operations. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_RD",
+        "PublicDescription": "Counts refills for memory accesses due to memory read operation counted by L2D_CACHE_RD. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_WR",
+        "PublicDescription": "Counts refills for memory accesses due to memory write operation counted by L2D_CACHE_WR. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
+        "PublicDescription": "Counts evictions from the level 2 cache because of a line being allocated into the L2 cache."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_CLEAN",
+        "PublicDescription": "Counts write-backs from the level 2 cache that are a result of either:\n\n1. Cache maintenance operations,\n\n2. Snoop responses or,\n\n3. Direct cache transfers to another CPU due to a forwarding snoop request."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_INVAL",
+        "PublicDescription": "Counts each explicit invalidation of a cache line in the level 2 cache by cache maintenance operations that operate by a virtual address, or by external coherency operations. This event does not count if either:\n\n1. A cache refill invalidates a cache line or,\n2. A Cache Maintenance Operation (CMO), which invalidates a cache line specified by set/way, is executed on that CPU.\n\nCMOs that operate by set/way cannot be broadcast from one CPU to another."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_LMISS_RD",
+        "PublicDescription": "Counts cache line refills into the level 2 unified cache from any memory read operations that incurred additional latency."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json
new file mode 100644 (file)
index 0000000..45bfba5
--- /dev/null
@@ -0,0 +1,22 @@
+[
+    {
+        "ArchStdEvent": "L3D_CACHE_ALLOCATE",
+        "PublicDescription": "Counts level 3 cache line allocates that do not fetch data from outside the level 3 data or unified cache. For example, allocates due to streaming stores."
+    },
+    {
+        "ArchStdEvent": "L3D_CACHE_REFILL",
+        "PublicDescription": "Counts level 3 accesses that receive data from outside the L3 cache."
+    },
+    {
+        "ArchStdEvent": "L3D_CACHE",
+        "PublicDescription": "Counts level 3 cache accesses. level 3 cache is a unified cache for data and instruction accesses. Accesses are for misses in the lower level caches or translation resolutions due to accesses."
+    },
+    {
+        "ArchStdEvent": "L3D_CACHE_RD",
+        "PublicDescription": "TBD"
+    },
+    {
+        "ArchStdEvent": "L3D_CACHE_LMISS_RD",
+        "PublicDescription": "Counts any cache line refill into the level 3 cache from memory read operations that incurred additional latency."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json
new file mode 100644 (file)
index 0000000..bb712d5
--- /dev/null
@@ -0,0 +1,10 @@
+[
+    {
+        "ArchStdEvent": "LL_CACHE_RD",
+        "PublicDescription": "Counts read transactions that were returned from outside the core cluster. This event counts when the system register CPUECTLR.EXTLLC bit is set. This event counts read transactions returned from outside the core if those transactions are either hit in the system level cache or missed in the SLC and are returned from any other external sources."
+    },
+    {
+        "ArchStdEvent": "LL_CACHE_MISS_RD",
+        "PublicDescription": "Counts read transactions that were returned from outside the core cluster but missed in the system level cache. This event counts when the system register CPUECTLR.EXTLLC bit is set. This event counts read transactions returned from outside the core if those transactions are missed in the System level Cache. The data source of the transaction is indicated by a field in the CHI transaction returning to the CPU. This event does not count reads caused by cache maintenance operations."
+    }
+]
index 7b2b21a..106a97f 100644 (file)
@@ -1,41 +1,46 @@
 [
     {
-        "ArchStdEvent": "MEM_ACCESS"
+        "ArchStdEvent": "MEM_ACCESS",
+        "PublicDescription": "Counts memory accesses issued by the CPU load store unit, where those accesses are issued due to load or store operations. This event counts memory accesses no matter whether the data is received from any level of cache hierarchy or external memory. If memory accesses are broken up into smaller transactions than what were specified in the load or store instructions, then the event counts those smaller memory transactions."
     },
     {
-        "ArchStdEvent": "REMOTE_ACCESS"
+        "ArchStdEvent": "MEMORY_ERROR",
+        "PublicDescription": "Counts any detected correctable or uncorrectable physical memory errors (ECC or parity) in protected CPUs RAMs. On the core, this event counts errors in the caches (including data and tag rams). Any detected memory error (from either a speculative and abandoned access, or an architecturally executed access) is counted. Note that errors are only detected when the actual protected memory is accessed by an operation."
     },
     {
-        "ArchStdEvent": "MEM_ACCESS_RD"
+        "ArchStdEvent": "REMOTE_ACCESS",
+        "PublicDescription": "Counts accesses to another chip, which is implemented as a different CMN mesh in the system. If the CHI bus response back to the core indicates that the data source is from another chip (mesh), then the counter is updated. If no data is returned, even if the system snoops another chip/mesh, then the counter is not updated."
     },
     {
-        "ArchStdEvent": "MEM_ACCESS_WR"
+        "ArchStdEvent": "MEM_ACCESS_RD",
+        "PublicDescription": "Counts memory accesses issued by the CPU due to load operations. The event counts any memory load access, no matter whether the data is received from any level of cache hierarchy or external memory. The event also counts atomic load operations. If memory accesses are broken up by the load/store unit into smaller transactions that are issued by the bus interface, then the event counts those smaller transactions."
     },
     {
-        "ArchStdEvent": "UNALIGNED_LD_SPEC"
+        "ArchStdEvent": "MEM_ACCESS_WR",
+        "PublicDescription": "Counts memory accesses issued by the CPU due to store operations. The event counts any memory store access, no matter whether the data is located in any level of cache or external memory. The event also counts atomic load and store operations. If memory accesses are broken up by the load/store unit into smaller transactions that are issued by the bus interface, then the event counts those smaller transactions."
     },
     {
-        "ArchStdEvent": "UNALIGNED_ST_SPEC"
+        "ArchStdEvent": "LDST_ALIGN_LAT",
+        "PublicDescription": "Counts the number of memory read and write accesses in a cycle that incurred additional latency, due to the alignment of the address and the size of data being accessed, which results in store crossing a single cache line."
     },
     {
-        "ArchStdEvent": "UNALIGNED_LDST_SPEC"
+        "ArchStdEvent": "LD_ALIGN_LAT",
+        "PublicDescription": "Counts the number of memory read accesses in a cycle that incurred additional latency, due to the alignment of the address and size of data being accessed, which results in load crossing a single cache line."
     },
     {
-        "ArchStdEvent": "LDST_ALIGN_LAT"
+        "ArchStdEvent": "ST_ALIGN_LAT",
+        "PublicDescription": "Counts the number of memory write access in a cycle that incurred additional latency, due to the alignment of the address and size of data being accessed incurred additional latency."
     },
     {
-        "ArchStdEvent": "LD_ALIGN_LAT"
+        "ArchStdEvent": "MEM_ACCESS_CHECKED",
+        "PublicDescription": "Counts the number of memory read and write accesses in a cycle that are tag checked by the Memory Tagging Extension (MTE)."
     },
     {
-        "ArchStdEvent": "ST_ALIGN_LAT"
+        "ArchStdEvent": "MEM_ACCESS_CHECKED_RD",
+        "PublicDescription": "Counts the number of memory read accesses in a cycle that are tag checked by the Memory Tagging Extension (MTE)."
     },
     {
-        "ArchStdEvent": "MEM_ACCESS_CHECKED"
-    },
-    {
-        "ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
-    },
-    {
-        "ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
+        "ArchStdEvent": "MEM_ACCESS_CHECKED_WR",
+        "PublicDescription": "Counts the number of memory write accesses in a cycle that is tag checked by the Memory Tagging Extension (MTE)."
     }
 ]
index 8ad15b7..5f44927 100644 (file)
 [
     {
-        "ArchStdEvent": "FRONTEND_BOUND",
-        "MetricExpr": "((stall_slot_frontend) if (#slots - 5) else (stall_slot_frontend - cpu_cycles)) / (#slots * cpu_cycles)"
+        "ArchStdEvent": "backend_bound",
+        "MetricExpr": "(100 * ((STALL_SLOT_BACKEND / (CPU_CYCLES * #slots)) - ((BR_MIS_PRED * 3) / CPU_CYCLES)))"
     },
     {
-        "ArchStdEvent": "BAD_SPECULATION",
-        "MetricExpr": "(1 - op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))"
+        "MetricName": "backend_stalled_cycles",
+        "MetricExpr": "((STALL_BACKEND / CPU_CYCLES) * 100)",
+        "BriefDescription": "This metric is the percentage of cycles that were stalled due to resource constraints in the backend unit of the processor.",
+        "MetricGroup": "Cycle_Accounting",
+        "ScaleUnit": "1percent of cycles"
     },
     {
-        "ArchStdEvent": "RETIRING",
-        "MetricExpr": "(op_retired / op_spec) * (1 - (stall_slot if (#slots - 5) else (stall_slot - cpu_cycles)) / (#slots * cpu_cycles))"
+        "ArchStdEvent": "bad_speculation",
+        "MetricExpr": "(100 * (((1 - (OP_RETIRED / OP_SPEC)) * (1 - (((STALL_SLOT) if (strcmp_cpuid_str(0x410fd493) | strcmp_cpuid_str(0x410fd490) ^ 1) else (STALL_SLOT - CPU_CYCLES)) / (CPU_CYCLES * #slots)))) + ((BR_MIS_PRED * 4) / CPU_CYCLES)))"
     },
     {
-        "ArchStdEvent": "BACKEND_BOUND"
+        "MetricName": "branch_misprediction_ratio",
+        "MetricExpr": "(BR_MIS_PRED_RETIRED / BR_RETIRED)",
+        "BriefDescription": "This metric measures the ratio of branches mispredicted to the total number of branches architecturally executed. This gives an indication of the effectiveness of the branch prediction unit.",
+        "MetricGroup": "Miss_Ratio;Branch_Effectiveness",
+        "ScaleUnit": "1per branch"
     },
     {
-        "MetricExpr": "L1D_TLB_REFILL / L1D_TLB",
-        "BriefDescription": "The rate of L1D TLB refill to the overall L1D TLB lookups",
-        "MetricGroup": "TLB",
-        "MetricName": "l1d_tlb_miss_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "branch_mpki",
+        "MetricExpr": "((BR_MIS_PRED_RETIRED / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of branch mispredictions per thousand instructions executed.",
+        "MetricGroup": "MPKI;Branch_Effectiveness",
+        "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "L1I_TLB_REFILL / L1I_TLB",
-        "BriefDescription": "The rate of L1I TLB refill to the overall L1I TLB lookups",
-        "MetricGroup": "TLB",
-        "MetricName": "l1i_tlb_miss_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "branch_percentage",
+        "MetricExpr": "(((BR_IMMED_SPEC + BR_INDIRECT_SPEC) / INST_SPEC) * 100)",
+        "BriefDescription": "This metric measures branch operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "1percent of operations"
     },
     {
-        "MetricExpr": "L2D_TLB_REFILL / L2D_TLB",
-        "BriefDescription": "The rate of L2D TLB refill to the overall L2D TLB lookups",
-        "MetricGroup": "TLB",
-        "MetricName": "l2_tlb_miss_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "crypto_percentage",
+        "MetricExpr": "((CRYPTO_SPEC / INST_SPEC) * 100)",
+        "BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "1percent of operations"
     },
     {
-        "MetricExpr": "DTLB_WALK / INST_RETIRED * 1000",
-        "BriefDescription": "The rate of TLB Walks per kilo instructions for data accesses",
-        "MetricGroup": "TLB",
         "MetricName": "dtlb_mpki",
+        "MetricExpr": "((DTLB_WALK / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of data TLB Walks per thousand instructions executed.",
+        "MetricGroup": "MPKI;DTLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "DTLB_WALK / L1D_TLB",
-        "BriefDescription": "The rate of DTLB Walks to the overall L1D TLB lookups",
-        "MetricGroup": "TLB",
-        "MetricName": "dtlb_walk_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "dtlb_walk_ratio",
+        "MetricExpr": "(DTLB_WALK / L1D_TLB)",
+        "BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.",
+        "MetricGroup": "Miss_Ratio;DTLB_Effectiveness",
+        "ScaleUnit": "1per TLB access"
     },
     {
-        "MetricExpr": "ITLB_WALK / INST_RETIRED * 1000",
-        "BriefDescription": "The rate of TLB Walks per kilo instructions for instruction accesses",
-        "MetricGroup": "TLB",
-        "MetricName": "itlb_mpki",
-        "ScaleUnit": "1MPKI"
+        "ArchStdEvent": "frontend_bound",
+        "MetricExpr": "(100 * ((((STALL_SLOT_FRONTEND) if (strcmp_cpuid_str(0x410fd493) | strcmp_cpuid_str(0x410fd490) ^ 1) else (STALL_SLOT_FRONTEND - CPU_CYCLES)) / (CPU_CYCLES * #slots)) - (BR_MIS_PRED / CPU_CYCLES)))"
     },
     {
-        "MetricExpr": "ITLB_WALK / L1I_TLB",
-        "BriefDescription": "The rate of ITLB Walks to the overall L1I TLB lookups",
-        "MetricGroup": "TLB",
-        "MetricName": "itlb_walk_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "frontend_stalled_cycles",
+        "MetricExpr": "((STALL_FRONTEND / CPU_CYCLES) * 100)",
+        "BriefDescription": "This metric is the percentage of cycles that were stalled due to resource constraints in the frontend unit of the processor.",
+        "MetricGroup": "Cycle_Accounting",
+        "ScaleUnit": "1percent of cycles"
     },
     {
-        "MetricExpr": "L1I_CACHE_REFILL / INST_RETIRED * 1000",
-        "BriefDescription": "The rate of L1 I-Cache misses per kilo instructions",
-        "MetricGroup": "Cache",
-        "MetricName": "l1i_cache_mpki",
+        "MetricName": "integer_dp_percentage",
+        "MetricExpr": "((DP_SPEC / INST_SPEC) * 100)",
+        "BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "1percent of operations"
+    },
+    {
+        "MetricName": "ipc",
+        "MetricExpr": "(INST_RETIRED / CPU_CYCLES)",
+        "BriefDescription": "This metric measures the number of instructions retired per cycle.",
+        "MetricGroup": "General",
+        "ScaleUnit": "1per cycle"
+    },
+    {
+        "MetricName": "itlb_mpki",
+        "MetricExpr": "((ITLB_WALK / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of instruction TLB Walks per thousand instructions executed.",
+        "MetricGroup": "MPKI;ITLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE",
-        "BriefDescription": "The rate of L1 I-Cache misses to the overall L1 I-Cache",
-        "MetricGroup": "Cache",
-        "MetricName": "l1i_cache_miss_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "itlb_walk_ratio",
+        "MetricExpr": "(ITLB_WALK / L1I_TLB)",
+        "BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.",
+        "MetricGroup": "Miss_Ratio;ITLB_Effectiveness",
+        "ScaleUnit": "1per TLB access"
+    },
+    {
+        "MetricName": "l1d_cache_miss_ratio",
+        "MetricExpr": "(L1D_CACHE_REFILL / L1D_CACHE)",
+        "BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.",
+        "MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness",
+        "ScaleUnit": "1per cache access"
     },
     {
-        "MetricExpr": "L1D_CACHE_REFILL / INST_RETIRED * 1000",
-        "BriefDescription": "The rate of L1 D-Cache misses per kilo instructions",
-        "MetricGroup": "Cache",
         "MetricName": "l1d_cache_mpki",
+        "MetricExpr": "((L1D_CACHE_REFILL / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of level 1 data cache accesses missed per thousand instructions executed.",
+        "MetricGroup": "MPKI;L1D_Cache_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE",
-        "BriefDescription": "The rate of L1 D-Cache misses to the overall L1 D-Cache",
-        "MetricGroup": "Cache",
-        "MetricName": "l1d_cache_miss_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "l1d_tlb_miss_ratio",
+        "MetricExpr": "(L1D_TLB_REFILL / L1D_TLB)",
+        "BriefDescription": "This metric measures the ratio of level 1 data TLB accesses missed to the total number of level 1 data TLB accesses. This gives an indication of the effectiveness of the level 1 data TLB.",
+        "MetricGroup": "Miss_Ratio;DTLB_Effectiveness",
+        "ScaleUnit": "1per TLB access"
     },
     {
-        "MetricExpr": "L2D_CACHE_REFILL / INST_RETIRED * 1000",
-        "BriefDescription": "The rate of L2 D-Cache misses per kilo instructions",
-        "MetricGroup": "Cache",
-        "MetricName": "l2d_cache_mpki",
+        "MetricName": "l1d_tlb_mpki",
+        "MetricExpr": "((L1D_TLB_REFILL / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of level 1 instruction TLB accesses missed per thousand instructions executed.",
+        "MetricGroup": "MPKI;DTLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE",
-        "BriefDescription": "The rate of L2 D-Cache misses to the overall L2 D-Cache",
-        "MetricGroup": "Cache",
-        "MetricName": "l2d_cache_miss_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "l1i_cache_miss_ratio",
+        "MetricExpr": "(L1I_CACHE_REFILL / L1I_CACHE)",
+        "BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.",
+        "MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness",
+        "ScaleUnit": "1per cache access"
     },
     {
-        "MetricExpr": "L3D_CACHE_REFILL / INST_RETIRED * 1000",
-        "BriefDescription": "The rate of L3 D-Cache misses per kilo instructions",
-        "MetricGroup": "Cache",
-        "MetricName": "l3d_cache_mpki",
+        "MetricName": "l1i_cache_mpki",
+        "MetricExpr": "((L1I_CACHE_REFILL / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of level 1 instruction cache accesses missed per thousand instructions executed.",
+        "MetricGroup": "MPKI;L1I_Cache_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "L3D_CACHE_REFILL / L3D_CACHE",
-        "BriefDescription": "The rate of L3 D-Cache misses to the overall L3 D-Cache",
-        "MetricGroup": "Cache",
-        "MetricName": "l3d_cache_miss_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "l1i_tlb_miss_ratio",
+        "MetricExpr": "(L1I_TLB_REFILL / L1I_TLB)",
+        "BriefDescription": "This metric measures the ratio of level 1 instruction TLB accesses missed to the total number of level 1 instruction TLB accesses. This gives an indication of the effectiveness of the level 1 instruction TLB.",
+        "MetricGroup": "Miss_Ratio;ITLB_Effectiveness",
+        "ScaleUnit": "1per TLB access"
     },
     {
-        "MetricExpr": "LL_CACHE_MISS_RD / INST_RETIRED * 1000",
-        "BriefDescription": "The rate of LL Cache read misses per kilo instructions",
-        "MetricGroup": "Cache",
-        "MetricName": "ll_cache_read_mpki",
+        "MetricName": "l1i_tlb_mpki",
+        "MetricExpr": "((L1I_TLB_REFILL / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of level 1 instruction TLB accesses missed per thousand instructions executed.",
+        "MetricGroup": "MPKI;ITLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "LL_CACHE_MISS_RD / LL_CACHE_RD",
-        "BriefDescription": "The rate of LL Cache read misses to the overall LL Cache read",
-        "MetricGroup": "Cache",
-        "MetricName": "ll_cache_read_miss_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "l2_cache_miss_ratio",
+        "MetricExpr": "(L2D_CACHE_REFILL / L2D_CACHE)",
+        "BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.",
+        "MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness",
+        "ScaleUnit": "1per cache access"
     },
     {
-        "MetricExpr": "(LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD",
-        "BriefDescription": "The rate of LL Cache read hit to the overall LL Cache read",
-        "MetricGroup": "Cache",
-        "MetricName": "ll_cache_read_hit_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "l2_cache_mpki",
+        "MetricExpr": "((L2D_CACHE_REFILL / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of level 2 unified cache accesses missed per thousand instructions executed. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.",
+        "MetricGroup": "MPKI;L2_Cache_Effectiveness",
+        "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "BR_MIS_PRED_RETIRED / INST_RETIRED * 1000",
-        "BriefDescription": "The rate of branches mis-predicted per kilo instructions",
-        "MetricGroup": "Branch",
-        "MetricName": "branch_mpki",
+        "MetricName": "l2_tlb_miss_ratio",
+        "MetricExpr": "(L2D_TLB_REFILL / L2D_TLB)",
+        "BriefDescription": "This metric measures the ratio of level 2 unified TLB accesses missed to the total number of level 2 unified TLB accesses. This gives an indication of the effectiveness of the level 2 TLB.",
+        "MetricGroup": "Miss_Ratio;ITLB_Effectiveness;DTLB_Effectiveness",
+        "ScaleUnit": "1per TLB access"
+    },
+    {
+        "MetricName": "l2_tlb_mpki",
+        "MetricExpr": "((L2D_TLB_REFILL / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of level 2 unified TLB accesses missed per thousand instructions executed.",
+        "MetricGroup": "MPKI;ITLB_Effectiveness;DTLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "BR_RETIRED / INST_RETIRED * 1000",
-        "BriefDescription": "The rate of branches retired per kilo instructions",
-        "MetricGroup": "Branch",
-        "MetricName": "branch_pki",
-        "ScaleUnit": "1PKI"
+        "MetricName": "ll_cache_read_hit_ratio",
+        "MetricExpr": "((LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD)",
+        "BriefDescription": "This metric measures the ratio of last level cache read accesses hit in the cache to the total number of last level cache accesses. This gives an indication of the effectiveness of the last level cache for read traffic. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a system level cache.",
+        "MetricGroup": "LL_Cache_Effectiveness",
+        "ScaleUnit": "1per cache access"
     },
     {
-        "MetricExpr": "BR_MIS_PRED_RETIRED / BR_RETIRED",
-        "BriefDescription": "The rate of branches mis-predited to the overall branches",
-        "MetricGroup": "Branch",
-        "MetricName": "branch_miss_pred_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "ll_cache_read_miss_ratio",
+        "MetricExpr": "(LL_CACHE_MISS_RD / LL_CACHE_RD)",
+        "BriefDescription": "This metric measures the ratio of last level cache read accesses missed to the total number of last level cache accesses. This gives an indication of the effectiveness of the last level cache for read traffic. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a system level cache.",
+        "MetricGroup": "Miss_Ratio;LL_Cache_Effectiveness",
+        "ScaleUnit": "1per cache access"
     },
     {
-        "MetricExpr": "instructions / CPU_CYCLES",
-        "BriefDescription": "The average number of instructions executed for each cycle.",
-        "MetricGroup": "PEutilization",
-        "MetricName": "ipc"
+        "MetricName": "ll_cache_read_mpki",
+        "MetricExpr": "((LL_CACHE_MISS_RD / INST_RETIRED) * 1000)",
+        "BriefDescription": "This metric measures the number of last level cache read accesses missed per thousand instructions executed.",
+        "MetricGroup": "MPKI;LL_Cache_Effectiveness",
+        "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "ipc / 5",
-        "BriefDescription": "IPC percentage of peak. The peak of IPC is 5.",
-        "MetricGroup": "PEutilization",
-        "MetricName": "ipc_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "load_percentage",
+        "MetricExpr": "((LD_SPEC / INST_SPEC) * 100)",
+        "BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "1percent of operations"
     },
     {
-        "MetricExpr": "INST_RETIRED / CPU_CYCLES",
-        "BriefDescription": "Architecturally executed Instructions Per Cycle (IPC)",
-        "MetricGroup": "PEutilization",
-        "MetricName": "retired_ipc"
+        "ArchStdEvent": "retiring",
+        "MetricExpr": "(100 * ((OP_RETIRED / OP_SPEC) * (1 - (((STALL_SLOT) if (strcmp_cpuid_str(0x410fd493) | strcmp_cpuid_str(0x410fd490) ^ 1) else (STALL_SLOT - CPU_CYCLES)) / (CPU_CYCLES * #slots)))))"
     },
     {
-        "MetricExpr": "INST_SPEC / CPU_CYCLES",
-        "BriefDescription": "Speculatively executed Instructions Per Cycle (IPC)",
-        "MetricGroup": "PEutilization",
-        "MetricName": "spec_ipc"
+        "MetricName": "scalar_fp_percentage",
+        "MetricExpr": "((VFP_SPEC / INST_SPEC) * 100)",
+        "BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "1percent of operations"
     },
     {
-        "MetricExpr": "OP_RETIRED / OP_SPEC",
-        "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)",
-        "MetricGroup": "PEutilization",
-        "MetricName": "retired_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "simd_percentage",
+        "MetricExpr": "((ASE_SPEC / INST_SPEC) * 100)",
+        "BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "1percent of operations"
     },
     {
-        "MetricExpr": "1 - OP_RETIRED / OP_SPEC",
-        "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)",
-        "MetricGroup": "PEutilization",
-        "MetricName": "wasted_rate",
-        "ScaleUnit": "100%"
+        "MetricName": "store_percentage",
+        "MetricExpr": "((ST_SPEC / INST_SPEC) * 100)",
+        "BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "1percent of operations"
     },
     {
-        "MetricExpr": "OP_RETIRED / OP_SPEC * (1 - (STALL_SLOT if (#slots - 5) else (STALL_SLOT - CPU_CYCLES)) / (#slots * CPU_CYCLES))",
-        "BriefDescription": "The truly effective ratio of micro-operations executed by the CPU, which means that misprediction and stall are not included",
-        "MetricGroup": "PEutilization",
-        "MetricName": "cpu_utilization",
-        "ScaleUnit": "100%"
+        "MetricExpr": "L3D_CACHE_REFILL / INST_RETIRED * 1000",
+        "BriefDescription": "The rate of L3 D-Cache misses per kilo instructions",
+        "MetricGroup": "MPKI;L3_Cache_Effectiveness",
+        "MetricName": "l3d_cache_mpki",
+        "ScaleUnit": "1MPKI"
     },
     {
-        "MetricExpr": "LD_SPEC / INST_SPEC",
-        "BriefDescription": "The rate of load instructions speculatively executed to overall instructions speclatively executed",
-        "MetricGroup": "InstructionMix",
-        "MetricName": "load_spec_rate",
+        "MetricExpr": "L3D_CACHE_REFILL / L3D_CACHE",
+        "BriefDescription": "The rate of L3 D-Cache misses to the overall L3 D-Cache",
+        "MetricGroup": "Miss_Ratio;L3_Cache_Effectiveness",
+        "MetricName": "l3d_cache_miss_rate",
         "ScaleUnit": "100%"
     },
     {
-        "MetricExpr": "ST_SPEC / INST_SPEC",
-        "BriefDescription": "The rate of store instructions speculatively executed to overall instructions speclatively executed",
-        "MetricGroup": "InstructionMix",
-        "MetricName": "store_spec_rate",
-        "ScaleUnit": "100%"
+        "MetricExpr": "BR_RETIRED / INST_RETIRED * 1000",
+        "BriefDescription": "The rate of branches retired per kilo instructions",
+        "MetricGroup": "MPKI;Branch_Effectiveness",
+        "MetricName": "branch_pki",
+        "ScaleUnit": "1PKI"
     },
     {
-        "MetricExpr": "DP_SPEC / INST_SPEC",
-        "BriefDescription": "The rate of integer data-processing instructions speculatively executed to overall instructions speclatively executed",
-        "MetricGroup": "InstructionMix",
-        "MetricName": "data_process_spec_rate",
+        "MetricExpr": "ipc / #slots",
+        "BriefDescription": "IPC percentage of peak. The peak of IPC is the number of slots.",
+        "MetricGroup": "General",
+        "MetricName": "ipc_rate",
         "ScaleUnit": "100%"
     },
     {
-        "MetricExpr": "ASE_SPEC / INST_SPEC",
-        "BriefDescription": "The rate of advanced SIMD instructions speculatively executed to overall instructions speclatively executed",
-        "MetricGroup": "InstructionMix",
-        "MetricName": "advanced_simd_spec_rate",
-        "ScaleUnit": "100%"
+        "MetricExpr": "INST_SPEC / CPU_CYCLES",
+        "BriefDescription": "Speculatively executed Instructions Per Cycle (IPC)",
+        "MetricGroup": "General",
+        "MetricName": "spec_ipc"
     },
     {
-        "MetricExpr": "VFP_SPEC / INST_SPEC",
-        "BriefDescription": "The rate of floating point instructions speculatively executed to overall instructions speclatively executed",
-        "MetricGroup": "InstructionMix",
-        "MetricName": "float_point_spec_rate",
+        "MetricExpr": "OP_RETIRED / OP_SPEC",
+        "BriefDescription": "Of all the micro-operations issued, what percentage are retired(committed)",
+        "MetricGroup": "General",
+        "MetricName": "retired_rate",
         "ScaleUnit": "100%"
     },
     {
-        "MetricExpr": "CRYPTO_SPEC / INST_SPEC",
-        "BriefDescription": "The rate of crypto instructions speculatively executed to overall instructions speclatively executed",
-        "MetricGroup": "InstructionMix",
-        "MetricName": "crypto_spec_rate",
+        "MetricExpr": "1 - OP_RETIRED / OP_SPEC",
+        "BriefDescription": "Of all the micro-operations issued, what percentage are not retired(committed)",
+        "MetricGroup": "General",
+        "MetricName": "wasted_rate",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "BR_IMMED_SPEC / INST_SPEC",
-        "BriefDescription": "The rate of branch immediate instructions speculatively executed to overall instructions speclatively executed",
-        "MetricGroup": "InstructionMix",
+        "BriefDescription": "The rate of branch immediate instructions speculatively executed to overall instructions speculatively executed",
+        "MetricGroup": "Operation_Mix",
         "MetricName": "branch_immed_spec_rate",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "BR_RETURN_SPEC / INST_SPEC",
-        "BriefDescription": "The rate of procedure return instructions speculatively executed to overall instructions speclatively executed",
-        "MetricGroup": "InstructionMix",
+        "BriefDescription": "The rate of procedure return instructions speculatively executed to overall instructions speculatively executed",
+        "MetricGroup": "Operation_Mix",
         "MetricName": "branch_return_spec_rate",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "BR_INDIRECT_SPEC / INST_SPEC",
-        "BriefDescription": "The rate of indirect branch instructions speculatively executed to overall instructions speclatively executed",
-        "MetricGroup": "InstructionMix",
+        "BriefDescription": "The rate of indirect branch instructions speculatively executed to overall instructions speculatively executed",
+        "MetricGroup": "Operation_Mix",
         "MetricName": "branch_indirect_spec_rate",
         "ScaleUnit": "100%"
     }
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json
deleted file mode 100644 (file)
index f9fae15..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-[
-    {
-        "ArchStdEvent": "STALL_FRONTEND"
-    },
-    {
-        "ArchStdEvent": "STALL_BACKEND"
-    },
-    {
-        "ArchStdEvent": "STALL"
-    },
-    {
-        "ArchStdEvent": "STALL_SLOT_BACKEND"
-    },
-    {
-        "ArchStdEvent": "STALL_SLOT_FRONTEND"
-    },
-    {
-        "ArchStdEvent": "STALL_SLOT"
-    },
-    {
-        "ArchStdEvent": "STALL_BACKEND_MEM"
-    }
-]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json
new file mode 100644 (file)
index 0000000..f297b04
--- /dev/null
@@ -0,0 +1,30 @@
+[
+    {
+        "ArchStdEvent": "SW_INCR",
+        "PublicDescription": "Counts software writes to the PMSWINC_EL0 (software PMU increment) register. The PMSWINC_EL0 register is a manually updated counter for use by application software.\n\nThis event could be used to measure any user program event, such as accesses to a particular data structure (by writing to the PMSWINC_EL0 register each time the data structure is accessed).\n\nTo use the PMSWINC_EL0 register and event, developers must insert instructions that write to the PMSWINC_EL0 register into the source code.\n\nSince the SW_INCR event records writes to the PMSWINC_EL0 register, there is no need to do a read/increment/write sequence to the PMSWINC_EL0 register."
+    },
+    {
+        "ArchStdEvent": "INST_RETIRED",
+        "PublicDescription": "Counts instructions that have been architecturally executed."
+    },
+    {
+        "ArchStdEvent": "CID_WRITE_RETIRED",
+        "PublicDescription": "Counts architecturally executed writes to the CONTEXTIDR register, which usually contain the kernel PID and can be output with hardware trace."
+    },
+    {
+        "ArchStdEvent": "TTBR_WRITE_RETIRED",
+        "PublicDescription": "Counts architectural writes to TTBR0/1_EL1. If virtualization host extensions are enabled (by setting the HCR_EL2.E2H bit to 1), then accesses to TTBR0/1_EL1 that are redirected to TTBR0/1_EL2, or accesses to TTBR0/1_EL12, are counted. TTBRn registers are typically updated when the kernel is swapping user-space threads or applications."
+    },
+    {
+        "ArchStdEvent": "BR_RETIRED",
+        "PublicDescription": "Counts architecturally executed branches, whether the branch is taken or not. Instructions that explicitly write to the PC are also counted."
+    },
+    {
+        "ArchStdEvent": "BR_MIS_PRED_RETIRED",
+        "PublicDescription": "Counts branches counted by BR_RETIRED which were mispredicted and caused a pipeline flush."
+    },
+    {
+        "ArchStdEvent": "OP_RETIRED",
+        "PublicDescription": "Counts micro-operations that are architecturally executed. This is a count of number of micro-operations retired from the commit queue in a single cycle."
+    }
+]
index 20f2165..5de8b0f 100644 (file)
@@ -1,14 +1,18 @@
 [
     {
-        "ArchStdEvent": "SAMPLE_POP"
+        "ArchStdEvent": "SAMPLE_POP",
+        "PublicDescription": "Counts statistical profiling sample population, the count of all operations that could be sampled but may or may not be chosen for sampling."
     },
     {
-        "ArchStdEvent": "SAMPLE_FEED"
+        "ArchStdEvent": "SAMPLE_FEED",
+        "PublicDescription": "Counts statistical profiling samples taken for sampling."
     },
     {
-        "ArchStdEvent": "SAMPLE_FILTRATE"
+        "ArchStdEvent": "SAMPLE_FILTRATE",
+        "PublicDescription": "Counts statistical profiling samples taken which are not removed by filtering."
     },
     {
-        "ArchStdEvent": "SAMPLE_COLLISION"
+        "ArchStdEvent": "SAMPLE_COLLISION",
+        "PublicDescription": "Counts statistical profiling samples that have collided with a previous sample and so therefore not taken."
     }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json
new file mode 100644 (file)
index 0000000..1af961f
--- /dev/null
@@ -0,0 +1,110 @@
+[
+    {
+        "ArchStdEvent": "BR_MIS_PRED",
+        "PublicDescription": "Counts branches which are speculatively executed and mispredicted."
+    },
+    {
+        "ArchStdEvent": "BR_PRED",
+        "PublicDescription": "Counts branches speculatively executed and were predicted right."
+    },
+    {
+        "ArchStdEvent": "INST_SPEC",
+        "PublicDescription": "Counts operations that have been speculatively executed."
+    },
+    {
+        "ArchStdEvent": "OP_SPEC",
+        "PublicDescription": "Counts micro-operations speculatively executed. This is the count of the number of micro-operations dispatched in a cycle."
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_LD_SPEC",
+        "PublicDescription": "Counts unaligned memory read operations issued by the CPU. This event counts unaligned accesses (as defined by the actual instruction), even if they are subsequently issued as multiple aligned accesses. The event does not count preload operations (PLD, PLI)."
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_ST_SPEC",
+        "PublicDescription": "Counts unaligned memory write operations issued by the CPU. This event counts unaligned accesses (as defined by the actual instruction), even if they are subsequently issued as multiple aligned accesses."
+    },
+    {
+        "ArchStdEvent": "UNALIGNED_LDST_SPEC",
+        "PublicDescription": "Counts unaligned memory operations issued by the CPU. This event counts unaligned accesses (as defined by the actual instruction), even if they are subsequently issued as multiple aligned accesses."
+    },
+    {
+        "ArchStdEvent": "LDREX_SPEC",
+        "PublicDescription": "Counts Load-Exclusive operations that have been speculatively executed. Eg: LDREX, LDX"
+    },
+    {
+        "ArchStdEvent": "STREX_PASS_SPEC",
+        "PublicDescription": "Counts store-exclusive operations that have been speculatively executed and have successfully completed the store operation."
+    },
+    {
+        "ArchStdEvent": "STREX_FAIL_SPEC",
+        "PublicDescription": "Counts store-exclusive operations that have been speculatively executed and have not successfully completed the store operation."
+    },
+    {
+        "ArchStdEvent": "STREX_SPEC",
+        "PublicDescription": "Counts store-exclusive operations that have been speculatively executed."
+    },
+    {
+        "ArchStdEvent": "LD_SPEC",
+        "PublicDescription": "Counts speculatively executed load operations including Single Instruction Multiple Data (SIMD) load operations."
+    },
+    {
+        "ArchStdEvent": "ST_SPEC",
+        "PublicDescription": "Counts speculatively executed store operations including Single Instruction Multiple Data (SIMD) store operations."
+    },
+    {
+        "ArchStdEvent": "DP_SPEC",
+        "PublicDescription": "Counts speculatively executed logical or arithmetic instructions such as MOV/MVN operations."
+    },
+    {
+        "ArchStdEvent": "ASE_SPEC",
+        "PublicDescription": "Counts speculatively executed Advanced SIMD operations excluding load, store and move micro-operations that move data to or from SIMD (vector) registers."
+    },
+    {
+        "ArchStdEvent": "VFP_SPEC",
+        "PublicDescription": "Counts speculatively executed floating point operations. This event does not count operations that move data to or from floating point (vector) registers."
+    },
+    {
+        "ArchStdEvent": "PC_WRITE_SPEC",
+        "PublicDescription": "Counts speculatively executed operations which cause software changes of the PC. Those operations include all taken branch operations."
+    },
+    {
+        "ArchStdEvent": "CRYPTO_SPEC",
+        "PublicDescription": "Counts speculatively executed cryptographic operations except for PMULL and VMULL operations."
+    },
+    {
+        "ArchStdEvent": "BR_IMMED_SPEC",
+        "PublicDescription": "Counts immediate branch operations which are speculatively executed."
+    },
+    {
+        "ArchStdEvent": "BR_RETURN_SPEC",
+        "PublicDescription": "Counts procedure return operations (RET) which are speculatively executed."
+    },
+    {
+        "ArchStdEvent": "BR_INDIRECT_SPEC",
+        "PublicDescription": "Counts indirect branch operations including procedure returns, which are speculatively executed. This includes operations that force a software change of the PC, other than exception-generating operations.  Eg: BR Xn, RET"
+    },
+    {
+        "ArchStdEvent": "ISB_SPEC",
+        "PublicDescription": "Counts ISB operations that are executed."
+    },
+    {
+        "ArchStdEvent": "DSB_SPEC",
+        "PublicDescription": "Counts DSB operations that are speculatively issued to Load/Store unit in the CPU."
+    },
+    {
+        "ArchStdEvent": "DMB_SPEC",
+        "PublicDescription": "Counts DMB operations that are speculatively issued to the Load/Store unit in the CPU. This event does not count implied barriers from load acquire/store release operations."
+    },
+    {
+        "ArchStdEvent": "RC_LD_SPEC",
+        "PublicDescription": "Counts any load acquire operations that are speculatively executed. Eg: LDAR, LDARH, LDARB"
+    },
+    {
+        "ArchStdEvent": "RC_ST_SPEC",
+        "PublicDescription": "Counts any store release operations that are speculatively executed. Eg: STLR, STLRH, STLRB'"
+    },
+    {
+        "ArchStdEvent": "ASE_INST_SPEC",
+        "PublicDescription": "Counts speculatively executed Advanced SIMD operations."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json
new file mode 100644 (file)
index 0000000..bbbebc8
--- /dev/null
@@ -0,0 +1,30 @@
+[
+    {
+        "ArchStdEvent": "STALL_FRONTEND",
+        "PublicDescription": "Counts cycles when frontend could not send any micro-operations to the rename stage because of frontend resource stalls caused by fetch memory latency or branch prediction flow stalls. All the frontend slots were empty during the cycle when this event counts."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND",
+        "PublicDescription": "Counts cycles whenever the rename unit is unable to send any micro-operations to the backend of the pipeline because of backend resource constraints. Backend resource constraints can include issue stage fullness, execution stage fullness, or other internal pipeline resource fullness. All the backend slots were empty during the cycle when this event counts."
+    },
+    {
+        "ArchStdEvent": "STALL",
+        "PublicDescription": "Counts cycles when no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)."
+    },
+    {
+        "ArchStdEvent": "STALL_SLOT_BACKEND",
+        "PublicDescription": "Counts slots per cycle in which no operations are sent from the rename unit to the backend due to backend resource constraints."
+    },
+    {
+        "ArchStdEvent": "STALL_SLOT_FRONTEND",
+        "PublicDescription": "Counts slots per cycle in which no operations are sent to the rename unit from the frontend due to frontend resource constraints."
+    },
+    {
+        "ArchStdEvent": "STALL_SLOT",
+        "PublicDescription": "Counts slots per cycle in which no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_MEM",
+        "PublicDescription": "Counts cycles when the backend is stalled because there is a pending demand load request in progress in the last level core cache."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/sve.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/sve.json
new file mode 100644 (file)
index 0000000..51dab48
--- /dev/null
@@ -0,0 +1,50 @@
+[
+    {
+        "ArchStdEvent": "SVE_INST_SPEC",
+        "PublicDescription": "Counts speculatively executed operations that are SVE operations."
+    },
+    {
+        "ArchStdEvent": "SVE_PRED_SPEC",
+        "PublicDescription": "Counts speculatively executed predicated SVE operations."
+    },
+    {
+        "ArchStdEvent": "SVE_PRED_EMPTY_SPEC",
+        "PublicDescription": "Counts speculatively executed predicated SVE operations with no active predicate elements."
+    },
+    {
+        "ArchStdEvent": "SVE_PRED_FULL_SPEC",
+        "PublicDescription": "Counts speculatively executed predicated SVE operations with all predicate elements active."
+    },
+    {
+        "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC",
+        "PublicDescription": "Counts speculatively executed predicated SVE operations with at least one but not all active predicate elements."
+    },
+    {
+        "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC",
+        "PublicDescription": "Counts speculatively executed predicated SVE operations with at least one non active predicate elements."
+    },
+    {
+        "ArchStdEvent": "SVE_LDFF_SPEC",
+        "PublicDescription": "Counts speculatively executed SVE first fault or non-fault load operations."
+    },
+    {
+        "ArchStdEvent": "SVE_LDFF_FAULT_SPEC",
+        "PublicDescription": "Counts speculatively executed SVE first fault or non-fault load operations that clear at least one bit in the FFR."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INT8_SPEC",
+        "PublicDescription": "Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type an 8-bit integer."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INT16_SPEC",
+        "PublicDescription": "Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type a 16-bit integer."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INT32_SPEC",
+        "PublicDescription": "Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type a 32-bit integer."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INT64_SPEC",
+        "PublicDescription": "Counts speculatively executed Advanced SIMD or SVE integer operations with the largest data type a 64-bit integer."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json
new file mode 100644 (file)
index 0000000..b550af1
--- /dev/null
@@ -0,0 +1,66 @@
+[
+    {
+        "ArchStdEvent": "L1I_TLB_REFILL",
+        "PublicDescription": "Counts level 1 instruction TLB refills from any Instruction fetch. If there are multiple misses in the TLB that are resolved by the refill, then this event only counts once. This event will not count if the translation table walk results in a fault (such as a translation or access fault), since there is no new translation created for the TLB."
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL",
+        "PublicDescription": "Counts level 1 data TLB accesses that resulted in TLB refills. If there are multiple misses in the TLB that are resolved by the refill, then this event only counts once. This event counts for refills caused by preload instructions or hardware prefetch accesses. This event counts regardless of whether the miss hits in L2 or results in a translation table walk. This event will not count if the translation table walk results in a fault (such as a translation or access fault), since there is no new translation created for the TLB. This event will not count on an access from an AT(address translation) instruction."
+    },
+    {
+        "ArchStdEvent": "L1D_TLB",
+        "PublicDescription": "Counts level 1 data TLB accesses caused by any memory load or store operation. Note that load or store instructions can be broken up into multiple memory operations. This event does not count TLB maintenance operations."
+    },
+    {
+        "ArchStdEvent": "L1I_TLB",
+        "PublicDescription": "Counts level 1 instruction TLB accesses, whether the access hits or misses in the TLB. This event counts both demand accesses and prefetch or preload generated accesses."
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL",
+        "PublicDescription": "Counts level 2 TLB refills caused by memory operations from both data and instruction fetch, except for those caused by TLB maintenance operations and hardware prefetches."
+    },
+    {
+        "ArchStdEvent": "L2D_TLB",
+        "PublicDescription": "Counts level 2 TLB accesses except those caused by TLB maintenance operations."
+    },
+    {
+        "ArchStdEvent": "DTLB_WALK",
+        "PublicDescription": "Counts data memory translation table walks caused by a miss in the L2 TLB driven by a memory access. Note that partial translations that also cause a table walk are counted. This event does not count table walks caused by TLB maintenance operations."
+    },
+    {
+        "ArchStdEvent": "ITLB_WALK",
+        "PublicDescription": "Counts instruction memory translation table walks caused by a miss in the L2 TLB driven by a memory access. Partial translations that also cause a table walk are counted. This event does not count table walks caused by TLB maintenance operations."
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_RD",
+        "PublicDescription": "Counts level 1 data TLB refills caused by memory read operations. If there are multiple misses in the TLB that are resolved by the refill, then this event only counts once. This event counts for refills caused by preload instructions or hardware prefetch accesses. This event counts regardless of whether the miss hits in L2 or results in a translation table walk. This event will not count if the translation table walk results in a fault (such as a translation or access fault), since there is no new translation created for the TLB. This event will not count on an access from an Address Translation (AT) instruction."
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL_WR",
+        "PublicDescription": "Counts level 1 data TLB refills caused by data side memory write operations. If there are multiple misses in the TLB that are resolved by the refill, then this event only counts once. This event counts for refills caused by preload instructions or hardware prefetch accesses. This event counts regardless of whether the miss hits in L2 or results in a translation table walk. This event will not count if the table walk results in a fault (such as a translation or access fault), since there is no new translation created for the TLB. This event will not count with an access from an Address Translation (AT) instruction."
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_RD",
+        "PublicDescription": "Counts level 1 data TLB accesses caused by memory read operations. This event counts whether the access hits or misses in the TLB. This event does not count TLB maintenance operations."
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_WR",
+        "PublicDescription": "Counts any L1 data side TLB accesses caused by memory write operations. This event counts whether the access hits or misses in the TLB. This event does not count TLB maintenance operations."
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL_RD",
+        "PublicDescription": "Counts level 2 TLB refills caused by memory read operations from both data and instruction fetch except for those caused by TLB maintenance operations or hardware prefetches."
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL_WR",
+        "PublicDescription": "Counts level 2 TLB refills caused by memory write operations from both data and instruction fetch except for those caused by TLB maintenance operations."
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_RD",
+        "PublicDescription": "Counts level 2 TLB accesses caused by memory read operations from both data and instruction fetch except for those caused by TLB maintenance operations."
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_WR",
+        "PublicDescription": "Counts level 2 TLB accesses caused by memory write operations from both data and instruction fetch except for those caused by TLB maintenance operations."
+    }
+]
index 3116135..98f6fab 100644 (file)
@@ -1,29 +1,38 @@
 [
     {
-        "ArchStdEvent": "TRB_WRAP"
+        "ArchStdEvent": "TRB_WRAP",
+        "PublicDescription": "This event is generated each time the current write pointer is wrapped to the base pointer."
     },
     {
-        "ArchStdEvent": "TRCEXTOUT0"
+        "ArchStdEvent": "TRCEXTOUT0",
+        "PublicDescription": "This event is generated each time an event is signaled by ETE external event 0."
     },
     {
-        "ArchStdEvent": "TRCEXTOUT1"
+        "ArchStdEvent": "TRCEXTOUT1",
+        "PublicDescription": "This event is generated each time an event is signaled by ETE external event 1."
     },
     {
-        "ArchStdEvent": "TRCEXTOUT2"
+        "ArchStdEvent": "TRCEXTOUT2",
+        "PublicDescription": "This event is generated each time an event is signaled by ETE external event 2."
     },
     {
-        "ArchStdEvent": "TRCEXTOUT3"
+        "ArchStdEvent": "TRCEXTOUT3",
+        "PublicDescription": "This event is generated each time an event is signaled by ETE external event 3."
     },
     {
-        "ArchStdEvent": "CTI_TRIGOUT4"
+        "ArchStdEvent": "CTI_TRIGOUT4",
+        "PublicDescription": "This event is generated each time an event is signaled on CTI output trigger 4."
     },
     {
-        "ArchStdEvent": "CTI_TRIGOUT5"
+        "ArchStdEvent": "CTI_TRIGOUT5",
+        "PublicDescription": "This event is generated each time an event is signaled on CTI output trigger 5."
     },
     {
-        "ArchStdEvent": "CTI_TRIGOUT6"
+        "ArchStdEvent": "CTI_TRIGOUT6",
+        "PublicDescription": "This event is generated each time an event is signaled on CTI output trigger 6."
     },
     {
-        "ArchStdEvent": "CTI_TRIGOUT7"
+        "ArchStdEvent": "CTI_TRIGOUT7",
+        "PublicDescription": "This event is generated each time an event is signaled on CTI output trigger 7."
     }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json
new file mode 100644 (file)
index 0000000..e21c469
--- /dev/null
@@ -0,0 +1,373 @@
+[
+       {
+               "BriefDescription": "A Write or Read Op at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x0",
+               "EventName": "hif_rd_or_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Write Op at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x1",
+               "EventName": "hif_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read Op at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x2",
+               "EventName": "hif_rd",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read-Modify-Write Op at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x3",
+               "EventName": "hif_rmw",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A high priority Read at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x4",
+               "EventName": "hif_hi_pri_rd",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A write data cycle at DFI interface (to DRAM).",
+               "ConfigCode": "0x7",
+               "EventName": "dfi_wr_data_cycles",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A read data cycle at DFI interface (to DRAM).",
+               "ConfigCode": "0x8",
+               "EventName": "dfi_rd_data_cycles",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A high priority read becomes critical.",
+               "ConfigCode": "0x9",
+               "EventName": "hpr_xact_when_critical",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A low priority read becomes critical.",
+               "ConfigCode": "0xA",
+               "EventName": "lpr_xact_when_critical",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A write becomes critical.",
+               "ConfigCode": "0xB",
+               "EventName": "wr_xact_when_critical",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "An Activate(ACT) command to DRAM.",
+               "ConfigCode": "0xC",
+               "EventName": "op_is_activate",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read or Write CAS command to DRAM.",
+               "ConfigCode": "0xD",
+               "EventName": "op_is_rd_or_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "An Activate(ACT) command for read to DRAM.",
+               "ConfigCode": "0xE",
+               "EventName": "op_is_rd_activate",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read CAS command to DRAM.",
+               "ConfigCode": "0xF",
+               "EventName": "op_is_rd",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Write CAS command to DRAM.",
+               "ConfigCode": "0x10",
+               "EventName": "op_is_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Masked Write command to DRAM.",
+               "ConfigCode": "0x11",
+               "EventName": "op_is_mwr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Precharge(PRE) command to DRAM.",
+               "ConfigCode": "0x12",
+               "EventName": "op_is_precharge",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Precharge(PRE) required by read or write.",
+               "ConfigCode": "0x13",
+               "EventName": "precharge_for_rdwr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Precharge(PRE) required by other conditions.",
+               "ConfigCode": "0x14",
+               "EventName": "precharge_for_other",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A read-write turnaround.",
+               "ConfigCode": "0x15",
+               "EventName": "rdwr_transitions",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A write combine(merge) in write data buffer.",
+               "ConfigCode": "0x16",
+               "EventName": "write_combine",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Write-After-Read hazard.",
+               "ConfigCode": "0x17",
+               "EventName": "war_hazard",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read-After-Write hazard.",
+               "ConfigCode": "0x18",
+               "EventName": "raw_hazard",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Write-After-Write hazard.",
+               "ConfigCode": "0x19",
+               "EventName": "waw_hazard",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank0 enters self-refresh(SRE).",
+               "ConfigCode": "0x1A",
+               "EventName": "op_is_enter_selfref_rk0",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank1 enters self-refresh(SRE).",
+               "ConfigCode": "0x1B",
+               "EventName": "op_is_enter_selfref_rk1",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank2 enters self-refresh(SRE).",
+               "ConfigCode": "0x1C",
+               "EventName": "op_is_enter_selfref_rk2",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank3 enters self-refresh(SRE).",
+               "ConfigCode": "0x1D",
+               "EventName": "op_is_enter_selfref_rk3",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank0 enters power-down(PDE).",
+               "ConfigCode": "0x1E",
+               "EventName": "op_is_enter_powerdown_rk0",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank1 enters power-down(PDE).",
+               "ConfigCode": "0x1F",
+               "EventName": "op_is_enter_powerdown_rk1",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank2 enters power-down(PDE).",
+               "ConfigCode": "0x20",
+               "EventName": "op_is_enter_powerdown_rk2",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank3 enters power-down(PDE).",
+               "ConfigCode": "0x21",
+               "EventName": "op_is_enter_powerdown_rk3",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A cycle that Rank0 stays in self-refresh mode.",
+               "ConfigCode": "0x26",
+               "EventName": "selfref_mode_rk0",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A cycle that Rank1 stays in self-refresh mode.",
+               "ConfigCode": "0x27",
+               "EventName": "selfref_mode_rk1",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A cycle that Rank2 stays in self-refresh mode.",
+               "ConfigCode": "0x28",
+               "EventName": "selfref_mode_rk2",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A cycle that Rank3 stays in self-refresh mode.",
+               "ConfigCode": "0x29",
+               "EventName": "selfref_mode_rk3",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "An auto-refresh(REF) command to DRAM.",
+               "ConfigCode": "0x2A",
+               "EventName": "op_is_refresh",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A critical auto-refresh(REF) command to DRAM.",
+               "ConfigCode": "0x2B",
+               "EventName": "op_is_crit_ref",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "An MRR or MRW command to DRAM.",
+               "ConfigCode": "0x2D",
+               "EventName": "op_is_load_mode",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A ZQCal command to DRAM.",
+               "ConfigCode": "0x2E",
+               "EventName": "op_is_zqcl",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "At least one entry in read queue reaches the visible window limit.",
+               "ConfigCode": "0x30",
+               "EventName": "visible_window_limit_reached_rd",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "At least one entry in write queue reaches the visible window limit.",
+               "ConfigCode": "0x31",
+               "EventName": "visible_window_limit_reached_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A DQS Oscillator MPC command to DRAM.",
+               "ConfigCode": "0x34",
+               "EventName": "op_is_dqsosc_mpc",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A DQS Oscillator MRR command to DRAM.",
+               "ConfigCode": "0x35",
+               "EventName": "op_is_dqsosc_mrr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Temperature Compensated Refresh(TCR) MRR command to DRAM.",
+               "ConfigCode": "0x36",
+               "EventName": "op_is_tcr_mrr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A ZQCal Start command to DRAM.",
+               "ConfigCode": "0x37",
+               "EventName": "op_is_zqstart",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A ZQCal Latch command to DRAM.",
+               "ConfigCode": "0x38",
+               "EventName": "op_is_zqlatch",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A packet at CHI TXREQ interface (request).",
+               "ConfigCode": "0x39",
+               "EventName": "chi_txreq",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A packet at CHI TXDAT interface (read data).",
+               "ConfigCode": "0x3A",
+               "EventName": "chi_txdat",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A packet at CHI RXDAT interface (write data).",
+               "ConfigCode": "0x3B",
+               "EventName": "chi_rxdat",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A packet at CHI RXRSP interface.",
+               "ConfigCode": "0x3C",
+               "EventName": "chi_rxrsp",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A violation detected in TZC.",
+               "ConfigCode": "0x3D",
+               "EventName": "tsz_vio",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "The ddr cycles.",
+               "ConfigCode": "0x80",
+               "EventName": "ddr_cycles",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json
new file mode 100644 (file)
index 0000000..bc865b3
--- /dev/null
@@ -0,0 +1,20 @@
+[
+       {
+               "MetricName": "ddr_read_bandwidth.all",
+               "BriefDescription": "The ddr read bandwidth(MB/s).",
+               "MetricGroup": "ali_drw",
+               "MetricExpr": "hif_rd * 64 / 1e6 / duration_time",
+               "ScaleUnit": "1MB/s",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "MetricName": "ddr_write_bandwidth.all",
+               "BriefDescription": "The ddr write bandwidth(MB/s).",
+               "MetricGroup": "ali_drw",
+               "MetricExpr": "(hif_wr + hif_rmw) * 64 / 1e6 / duration_time",
+               "ScaleUnit": "1MB/s",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       }
+]
index f90b338..4eed79a 100644 (file)
@@ -1,34 +1,34 @@
 [
     {
-        "MetricExpr": "stall_slot_frontend / (#slots * cpu_cycles)",
-        "BriefDescription": "Frontend bound L1 topdown metric",
+        "MetricExpr": "100 * (stall_slot_frontend / (#slots * cpu_cycles))",
+        "BriefDescription": "This metric is the percentage of total slots that were stalled due to resource constraints in the frontend of the processor.",
         "DefaultMetricgroupName": "TopdownL1",
         "MetricGroup": "Default;TopdownL1",
         "MetricName": "frontend_bound",
-        "ScaleUnit": "100%"
+        "ScaleUnit": "1percent of slots"
     },
     {
-        "MetricExpr": "(1 - op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))",
-        "BriefDescription": "Bad speculation L1 topdown metric",
+        "MetricExpr": "100 * ((1 - op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles)))",
+        "BriefDescription": "This metric is the percentage of total slots that executed operations and didn't retire due to a pipeline flush.\nThis indicates cycles that were utilized but inefficiently.",
         "DefaultMetricgroupName": "TopdownL1",
         "MetricGroup": "Default;TopdownL1",
         "MetricName": "bad_speculation",
-        "ScaleUnit": "100%"
+        "ScaleUnit": "1percent of slots"
     },
     {
-        "MetricExpr": "(op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles))",
-        "BriefDescription": "Retiring L1 topdown metric",
+        "MetricExpr": "100 * ((op_retired / op_spec) * (1 - stall_slot / (#slots * cpu_cycles)))",
+        "BriefDescription": "This metric is the percentage of total slots that retired operations, which indicates cycles that were utilized efficiently.",
         "DefaultMetricgroupName": "TopdownL1",
         "MetricGroup": "Default;TopdownL1",
         "MetricName": "retiring",
-        "ScaleUnit": "100%"
+        "ScaleUnit": "1percent of slots"
     },
     {
-        "MetricExpr": "stall_slot_backend / (#slots * cpu_cycles)",
-        "BriefDescription": "Backend Bound L1 topdown metric",
+        "MetricExpr": "100 * (stall_slot_backend / (#slots * cpu_cycles))",
+        "BriefDescription": "This metric is the percentage of total slots that were stalled due to resource constraints in the backend of the processor.",
         "DefaultMetricgroupName": "TopdownL1",
         "MetricGroup": "Default;TopdownL1",
         "MetricName": "backend_bound",
-        "ScaleUnit": "100%"
+        "ScaleUnit": "1percent of slots"
     }
 ]
index 605be14..839ae26 100644 (file)
@@ -1,53 +1,8 @@
 [
   {
-    "EventCode": "0x1003C",
-    "EventName": "PM_EXEC_STALL_DMISS_L2L3",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
-  },
-  {
-    "EventCode": "0x1E054",
-    "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
-  },
-  {
-    "EventCode": "0x34054",
-    "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
-  },
-  {
-    "EventCode": "0x34056",
-    "EventName": "PM_EXEC_STALL_LOAD_FINISH",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
-  },
-  {
-    "EventCode": "0x3006C",
-    "EventName": "PM_RUN_CYC_SMT2_MODE",
-    "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
-  },
-  {
     "EventCode": "0x300F4",
     "EventName": "PM_RUN_INST_CMPL_CONC",
-    "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set."
-  },
-  {
-    "EventCode": "0x4C016",
-    "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
-  },
-  {
-    "EventCode": "0x4D014",
-    "EventName": "PM_EXEC_STALL_LOAD",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
-  },
-  {
-    "EventCode": "0x4D016",
-    "EventName": "PM_EXEC_STALL_PTESYNC",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
-  },
-  {
-    "EventCode": "0x401EA",
-    "EventName": "PM_THRESH_EXC_128",
-    "BriefDescription": "Threshold counter exceeded a value of 128."
+    "BriefDescription": "PowerPC instruction completed by this thread when all threads in the core had the run-latch set."
   },
   {
     "EventCode": "0x400F6",
index 54acb55..e816cd1 100644 (file)
@@ -1,7 +1,67 @@
 [
   {
-    "EventCode": "0x4016E",
-    "EventName": "PM_THRESH_NOT_MET",
-    "BriefDescription": "Threshold counter did not meet threshold."
+    "EventCode": "0x100F4",
+    "EventName": "PM_FLOP_CMPL",
+    "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
+  },
+  {
+    "EventCode": "0x45050",
+    "EventName": "PM_1FLOP_CMPL",
+    "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+  },
+  {
+    "EventCode": "0x45052",
+    "EventName": "PM_4FLOP_CMPL",
+    "BriefDescription": "Four floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+  },
+  {
+    "EventCode": "0x45054",
+    "EventName": "PM_FMA_CMPL",
+    "BriefDescription": "Two floating point instruction completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
+  },
+  {
+    "EventCode": "0x45056",
+    "EventName": "PM_SCALAR_FLOP_CMPL",
+    "BriefDescription": "Scalar floating point instruction completed."
+  },
+  {
+    "EventCode": "0x4505A",
+    "EventName": "PM_SP_FLOP_CMPL",
+    "BriefDescription": "Single Precision floating point instruction completed."
+  },
+  {
+    "EventCode": "0x4505C",
+    "EventName": "PM_MATH_FLOP_CMPL",
+    "BriefDescription": "Math floating point instruction completed."
+  },
+  {
+    "EventCode": "0x4D052",
+    "EventName": "PM_2FLOP_CMPL",
+    "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
+  },
+  {
+    "EventCode": "0x4D054",
+    "EventName": "PM_8FLOP_CMPL",
+    "BriefDescription": "Four Double Precision vector instruction completed."
+  },
+  {
+    "EventCode": "0x4D056",
+    "EventName": "PM_NON_FMA_FLOP_CMPL",
+    "BriefDescription": "Non FMA instruction completed."
+  },
+  {
+    "EventCode": "0x4D058",
+    "EventName": "PM_VECTOR_FLOP_CMPL",
+    "BriefDescription": "Vector floating point instruction completed."
+  },
+  {
+    "EventCode": "0x4D05A",
+    "EventName": "PM_NON_MATH_FLOP_CMPL",
+    "BriefDescription": "Non Math instruction completed."
+  },
+  {
+    "EventCode": "0x4D05C",
+    "EventName": "PM_DPP_FLOP_CMPL",
+    "BriefDescription": "Double-Precision or Quad-Precision instruction completed."
   }
 ]
index 558f953..5977f5e 100644 (file)
@@ -1,43 +1,13 @@
 [
   {
-    "EventCode": "0x10004",
-    "EventName": "PM_EXEC_STALL_TRANSLATION",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
+    "EventCode": "0x1D054",
+    "EventName": "PM_DTLB_HIT_2M",
+    "BriefDescription": "Data TLB hit (DERAT reload) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "0x10006",
-    "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
-  },
-  {
-    "EventCode": "0x10010",
-    "EventName": "PM_PMC4_OVERFLOW",
-    "BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
-  },
-  {
-    "EventCode": "0x10020",
-    "EventName": "PM_PMC4_REWIND",
-    "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
-  },
-  {
-    "EventCode": "0x10038",
-    "EventName": "PM_DISP_STALL_TRANSLATION",
-    "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
-  },
-  {
-    "EventCode": "0x1003A",
-    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
-  },
-  {
-    "EventCode": "0x1D05E",
-    "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
-  },
-  {
-    "EventCode": "0x1E050",
-    "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
+    "EventCode": "0x1D058",
+    "EventName": "PM_ITLB_HIT_64K",
+    "BriefDescription": "Instruction TLB hit (IERAT reload) page size 64K. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
   },
   {
     "EventCode": "0x1F054",
     "BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT."
   },
   {
-    "EventCode": "0x10064",
-    "EventName": "PM_DISP_STALL_IC_L2",
-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
-  },
-  {
-    "EventCode": "0x101E8",
-    "EventName": "PM_THRESH_EXC_256",
-    "BriefDescription": "Threshold counter exceeded a count of 256."
-  },
-  {
-    "EventCode": "0x101EC",
-    "EventName": "PM_THRESH_MET",
-    "BriefDescription": "Threshold exceeded."
-  },
-  {
     "EventCode": "0x100F2",
     "EventName": "PM_1PLUS_PPC_CMPL",
     "BriefDescription": "Cycles in which at least one instruction is completed by this thread."
   {
     "EventCode": "0x100F6",
     "EventName": "PM_IERAT_MISS",
-    "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event."
-  },
-  {
-    "EventCode": "0x100F8",
-    "EventName": "PM_DISP_STALL_CYC",
-    "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
-  },
-  {
-    "EventCode": "0x20006",
-    "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
-  },
-  {
-    "EventCode": "0x20114",
-    "EventName": "PM_MRK_L2_RC_DISP",
-    "BriefDescription": "Marked instruction RC dispatched in L2."
-  },
-  {
-    "EventCode": "0x2C010",
-    "EventName": "PM_EXEC_STALL_LSU",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
-  },
-  {
-    "EventCode": "0x2C016",
-    "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
-    "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
-  },
-  {
-    "EventCode": "0x2C01E",
-    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
-  },
-  {
-    "EventCode": "0x2D01A",
-    "EventName": "PM_DISP_STALL_IC_MISS",
-    "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss."
-  },
-  {
-    "EventCode": "0x2E018",
-    "EventName": "PM_DISP_STALL_FETCH",
-    "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
-  },
-  {
-    "EventCode": "0x2E01A",
-    "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full."
-  },
-  {
-    "EventCode": "0x2C142",
-    "EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
-    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+    "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event. This event only counts instruction demand access."
   },
   {
     "EventCode": "0x24050",
     "BriefDescription": "Branch Taken instruction completed."
   },
   {
-    "EventCode": "0x30004",
-    "EventName": "PM_DISP_STALL_FLUSH",
-    "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
-  },
-  {
     "EventCode": "0x3000A",
     "EventName": "PM_DISP_STALL_ITLB_MISS",
     "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss."
     "BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush."
   },
   {
-    "EventCode": "0x30014",
-    "EventName": "PM_EXEC_STALL_STORE",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
-  },
-  {
-    "EventCode": "0x30018",
-    "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
-  },
-  {
-    "EventCode": "0x30026",
-    "EventName": "PM_EXEC_STALL_STORE_MISS",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
-  },
-  {
-    "EventCode": "0x3012A",
-    "EventName": "PM_MRK_L2_RC_DONE",
-    "BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
-  },
-  {
     "EventCode": "0x3F046",
     "EventName": "PM_ITLB_HIT_1G",
     "BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "0x34058",
-    "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
-    "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
-  },
-  {
-    "EventCode": "0x3D05C",
-    "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
-  },
-  {
-    "EventCode": "0x3E052",
-    "EventName": "PM_DISP_STALL_IC_L3",
-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
+    "EventCode": "0x3C05A",
+    "EventName": "PM_DTLB_HIT_64K",
+    "BriefDescription": "Data TLB hit (DERAT reload) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
     "EventCode": "0x3E054",
     "EventName": "PM_LD_MISS_L1",
-    "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
-  },
-  {
-    "EventCode": "0x301EA",
-    "EventName": "PM_THRESH_EXC_1024",
-    "BriefDescription": "Threshold counter exceeded a value of 1024."
+    "BriefDescription": "Load missed L1, counted at finish time. LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
   },
   {
     "EventCode": "0x300FA",
     "EventName": "PM_INST_FROM_L3MISS",
-    "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
+    "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss."
   },
   {
     "EventCode": "0x40006",
     "BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group."
   },
   {
-    "EventCode": "0x40116",
-    "EventName": "PM_MRK_LARX_FIN",
-    "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
-  },
-  {
-    "EventCode": "0x4C010",
-    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
-  },
-  {
-    "EventCode": "0x4D01E",
-    "EventName": "PM_DISP_STALL_BR_MPRED",
-    "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
-  },
-  {
-    "EventCode": "0x4E010",
-    "EventName": "PM_DISP_STALL_IC_L3MISS",
-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
-  },
-  {
-    "EventCode": "0x4E01A",
-    "EventName": "PM_DISP_STALL_HELD_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason."
+    "EventCode": "0x44056",
+    "EventName": "PM_VECTOR_ST_CMPL",
+    "BriefDescription": "Vector store instruction completed."
   },
   {
-    "EventCode": "0x4003C",
-    "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+    "EventCode": "0x4E054",
+    "EventName": "PM_DTLB_HIT_1G",
+    "BriefDescription": "Data TLB hit (DERAT reload) page size 1G. Implies radix translation. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "0x44056",
-    "EventName": "PM_VECTOR_ST_CMPL",
-    "BriefDescription": "Vector store instructions completed."
+    "EventCode": "0x400FC",
+    "EventName": "PM_ITLB_MISS",
+    "BriefDescription": "Instruction TLB reload (after a miss), all page sizes. Includes only demand misses."
   }
 ]
index 58b5dfe..78f71a9 100644 (file)
@@ -1,15 +1,35 @@
 [
   {
-    "EventCode": "0x1002C",
-    "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
-    "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
-  },
-  {
     "EventCode": "0x10132",
     "EventName": "PM_MRK_INST_ISSUED",
     "BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction."
   },
   {
+    "EventCode": "0x10134",
+    "EventName": "PM_MRK_ST_DONE_L2",
+    "BriefDescription": "Marked store completed in L2."
+  },
+  {
+    "EventCode": "0x1C142",
+    "EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
+    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+  },
+  {
+    "EventCode": "0x1C144",
+    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
+    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
+  },
+  {
+    "EventCode": "0x1D15C",
+    "EventName": "PM_MRK_DTLB_MISS_1G",
+    "BriefDescription": "Marked Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+  },
+  {
+    "EventCode": "0x1F150",
+    "EventName": "PM_MRK_ST_L2_CYC",
+    "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
+  },
+  {
     "EventCode": "0x101E0",
     "EventName": "PM_MRK_INST_DISP",
     "BriefDescription": "The thread has dispatched a randomly sampled marked instruction."
     "BriefDescription": "Marked Branch Taken instruction completed."
   },
   {
-    "EventCode": "0x20112",
-    "EventName": "PM_MRK_NTF_FIN",
-    "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch."
+    "EventCode": "0x101E4",
+    "EventName": "PM_MRK_L1_ICACHE_MISS",
+    "BriefDescription": "Marked instruction suffered an instruction cache miss."
+  },
+  {
+    "EventCode": "0x101EA",
+    "EventName": "PM_MRK_L1_RELOAD_VALID",
+    "BriefDescription": "Marked demand reload."
   },
   {
-    "EventCode": "0x2C01C",
-    "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
+    "EventCode": "0x20114",
+    "EventName": "PM_MRK_L2_RC_DISP",
+    "BriefDescription": "Marked instruction RC dispatched in L2."
+  },
+  {
+    "EventCode": "0x2011C",
+    "EventName": "PM_MRK_NTF_CYC",
+    "BriefDescription": "Cycles in which the marked instruction is the oldest in the pipeline (next-to-finish or next-to-complete)."
+  },
+  {
+    "EventCode": "0x20130",
+    "EventName": "PM_MRK_INST_DECODED",
+    "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
+  },
+  {
+    "EventCode": "0x20132",
+    "EventName": "PM_MRK_DFU_ISSUE",
+    "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
+  },
+  {
+    "EventCode": "0x20134",
+    "EventName": "PM_MRK_FXU_ISSUE",
+    "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
   },
   {
     "EventCode": "0x20138",
     "BriefDescription": "Marked Branch instruction finished."
   },
   {
+    "EventCode": "0x2013C",
+    "EventName": "PM_MRK_FX_LSU_FIN",
+    "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
+  },
+  {
+    "EventCode": "0x2C142",
+    "EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
+    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+  },
+  {
     "EventCode": "0x2C144",
     "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2",
     "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]."
     "BriefDescription": "A marked branch completed. All branches are included."
   },
   {
-    "EventCode": "0x200FD",
-    "EventName": "PM_L1_ICACHE_MISS",
-    "BriefDescription": "Demand iCache Miss."
+    "EventCode": "0x2D154",
+    "EventName": "PM_MRK_DERAT_MISS_64K",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+  },
+  {
+    "EventCode": "0x201E0",
+    "EventName": "PM_MRK_DATA_FROM_MEMORY",
+    "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
   },
   {
-    "EventCode": "0x30130",
-    "EventName": "PM_MRK_INST_FIN",
-    "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU."
+    "EventCode": "0x201E2",
+    "EventName": "PM_MRK_LD_MISS_L1",
+    "BriefDescription": "Marked demand data load miss counted at finish time."
+  },
+  {
+    "EventCode": "0x201E4",
+    "EventName": "PM_MRK_DATA_FROM_L3MISS",
+    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
+  },
+  {
+    "EventCode": "0x3012A",
+    "EventName": "PM_MRK_L2_RC_DONE",
+    "BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
+  },
+  {
+    "EventCode": "0x3012E",
+    "EventName": "PM_MRK_DTLB_MISS_2M",
+    "BriefDescription": "Marked Data TLB reload (after a miss) page size 2M, which implies Radix Page Table translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+  },
+  {
+    "EventCode": "0x30132",
+    "EventName": "PM_MRK_VSU_FIN",
+    "BriefDescription": "VSU marked instruction finished. Excludes simple FX instructions issued to the Store Unit."
   },
   {
     "EventCode": "0x34146",
     "EventName": "PM_MRK_LD_CMPL",
-    "BriefDescription": "Marked loads completed."
+    "BriefDescription": "Marked load instruction completed."
+  },
+  {
+    "EventCode": "0x3C142",
+    "EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
+    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
+  },
+  {
+    "EventCode": "0x3C144",
+    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
+    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
   },
   {
     "EventCode": "0x3E158",
   {
     "EventCode": "0x3E15A",
     "EventName": "PM_MRK_ST_FIN",
-    "BriefDescription": "The marked instruction was a store of any kind."
+    "BriefDescription": "Marked store instruction finished."
+  },
+  {
+    "EventCode": "0x3F150",
+    "EventName": "PM_MRK_ST_DRAIN_CYC",
+    "BriefDescription": "Cycles in which the marked store drained from the core to the L2."
   },
   {
-    "EventCode": "0x30068",
-    "EventName": "PM_L1_ICACHE_RELOADED_PREF",
-    "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)."
+    "EventCode": "0x30162",
+    "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
+    "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
+  },
+  {
+    "EventCode": "0x301E2",
+    "EventName": "PM_MRK_ST_CMPL",
+    "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
   },
   {
     "EventCode": "0x301E4",
     "BriefDescription": "Marked Branch Mispredicted. Includes direction and target."
   },
   {
-    "EventCode": "0x300F6",
-    "EventName": "PM_LD_DEMAND_MISS_L1",
-    "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
+    "EventCode": "0x301E6",
+    "EventName": "PM_MRK_DERAT_MISS",
+    "BriefDescription": "Marked Erat Miss (Data TLB Access) All page sizes. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+  },
+  {
+    "EventCode": "0x4010E",
+    "EventName": "PM_MRK_TLBIE_FIN",
+    "BriefDescription": "Marked TLBIE instruction finished. Includes TLBIE and TLBIEL instructions."
+  },
+  {
+    "EventCode": "0x40116",
+    "EventName": "PM_MRK_LARX_FIN",
+    "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
+  },
+  {
+    "EventCode": "0x40132",
+    "EventName": "PM_MRK_LSU_FIN",
+    "BriefDescription": "LSU marked instruction finish."
+  },
+  {
+    "EventCode": "0x44146",
+    "EventName": "PM_MRK_STCX_CORE_CYC",
+    "BriefDescription": "Cycles spent in the core portion of a marked STCX instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
   },
   {
-    "EventCode": "0x300FE",
-    "EventName": "PM_DATA_FROM_L3MISS",
-    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
+    "EventCode": "0x4C142",
+    "EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
+    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "0x40012",
-    "EventName": "PM_L1_ICACHE_RELOADED_ALL",
-    "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
+    "EventCode": "0x4C144",
+    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
+    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
   },
   {
-    "EventCode": "0x40134",
-    "EventName": "PM_MRK_INST_TIMEO",
-    "BriefDescription": "Marked instruction finish timeout (instruction was lost)."
+    "EventCode": "0x4C15C",
+    "EventName": "PM_MRK_DERAT_MISS_1G",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "0x4505A",
-    "EventName": "PM_SP_FLOP_CMPL",
-    "BriefDescription": "Single Precision floating point instructions completed."
+    "EventCode": "0x4C15E",
+    "EventName": "PM_MRK_DTLB_MISS_64K",
+    "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "0x4D058",
-    "EventName": "PM_VECTOR_FLOP_CMPL",
-    "BriefDescription": "Vector floating point instructions completed."
+    "EventCode": "0x4E15E",
+    "EventName": "PM_MRK_INST_FLUSHED",
+    "BriefDescription": "The marked instruction was flushed."
   },
   {
-    "EventCode": "0x4D05A",
-    "EventName": "PM_NON_MATH_FLOP_CMPL",
-    "BriefDescription": "Non Math instructions completed."
+    "EventCode": "0x40164",
+    "EventName": "PM_MRK_DERAT_MISS_2M",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
     "EventCode": "0x401E0",
     "EventName": "PM_MRK_INST_CMPL",
-    "BriefDescription": "marked instruction completed."
+    "BriefDescription": "Marked instruction completed."
+  },
+  {
+    "EventCode": "0x401E4",
+    "EventName": "PM_MRK_DTLB_MISS",
+    "BriefDescription": "The DPTEG required for the marked load/store instruction in execution was missing from the TLB. This event only counts for demand misses."
+  },
+  {
+    "EventCode": "0x401E6",
+    "EventName": "PM_MRK_INST_FROM_L3MISS",
+    "BriefDescription": "The processor's instruction cache was reloaded from beyond the local core's L3 due to a demand miss for a marked instruction."
   },
   {
-    "EventCode": "0x400FE",
-    "EventName": "PM_DATA_FROM_MEMORY",
-    "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
+    "EventCode": "0x401E8",
+    "EventName": "PM_MRK_DATA_FROM_L2MISS",
+    "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss for a marked instruction."
   }
 ]
index 843b51f..8852629 100644 (file)
@@ -1,25 +1,10 @@
 [
   {
-    "EventCode": "0x1000A",
-    "EventName": "PM_PMC3_REWIND",
-    "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
-  },
-  {
     "EventCode": "0x1C040",
     "EventName": "PM_XFER_FROM_SRC_PMC1",
     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "0x1C142",
-    "EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
-    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
-  },
-  {
-    "EventCode": "0x1C144",
-    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
-    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
-  },
-  {
     "EventCode": "0x1C056",
     "EventName": "PM_DERAT_MISS_4K",
     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
     "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "0x1E056",
-    "EventName": "PM_EXEC_STALL_STORE_PIPE",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
-  },
-  {
-    "EventCode": "0x1F150",
-    "EventName": "PM_MRK_ST_L2_CYC",
-    "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
-  },
-  {
     "EventCode": "0x10062",
     "EventName": "PM_LD_L3MISS_PEND_CYC",
-    "BriefDescription": "Cycles L3 miss was pending for this thread."
-  },
-  {
-    "EventCode": "0x20010",
-    "EventName": "PM_PMC1_OVERFLOW",
-    "BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
+    "BriefDescription": "Cycles in which an L3 miss was pending for this thread."
   },
   {
     "EventCode": "0x2001A",
@@ -80,9 +50,9 @@
     "BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "0x2D154",
-    "EventName": "PM_MRK_DERAT_MISS_64K",
-    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
+    "EventCode": "0x2C05A",
+    "EventName": "PM_DERAT_MISS_1G",
+    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 1G. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
     "EventCode": "0x200F6",
@@ -90,9 +60,9 @@
     "BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "0x30016",
-    "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
+    "EventCode": "0x34044",
+    "EventName": "PM_DERAT_MISS_PREF",
+    "BriefDescription": "DERAT miss (TLB access) while servicing a data prefetch."
   },
   {
     "EventCode": "0x3C040",
     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "0x3C142",
-    "EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
-    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
-  },
-  {
-    "EventCode": "0x3C144",
-    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
-    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
-  },
-  {
     "EventCode": "0x3C054",
     "EventName": "PM_DERAT_MISS_16M",
     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
     "BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
   },
   {
-    "EventCode": "0x301E2",
-    "EventName": "PM_MRK_ST_CMPL",
-    "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
-  },
-  {
     "EventCode": "0x300FC",
     "EventName": "PM_DTLB_MISS",
-    "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity."
-  },
-  {
-    "EventCode": "0x4D02C",
-    "EventName": "PM_PMC1_REWIND",
-    "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
+    "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. This event only counts for demand misses."
   },
   {
     "EventCode": "0x4003E",
     "EventName": "PM_LD_CMPL",
-    "BriefDescription": "Loads completed."
+    "BriefDescription": "Load instruction completed."
   },
   {
     "EventCode": "0x4C040",
     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "0x4C142",
-    "EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
-    "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
-  },
-  {
-    "EventCode": "0x4C144",
-    "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
-    "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
-  },
-  {
     "EventCode": "0x4C056",
     "EventName": "PM_DTLB_MISS_16M",
     "BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
     "EventCode": "0x4C05A",
     "EventName": "PM_DTLB_MISS_1G",
     "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
-  },
-  {
-    "EventCode": "0x4C15E",
-    "EventName": "PM_MRK_DTLB_MISS_64K",
-    "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
-  },
-  {
-    "EventCode": "0x4D056",
-    "EventName": "PM_NON_FMA_FLOP_CMPL",
-    "BriefDescription": "Non FMA instruction completed."
-  },
-  {
-    "EventCode": "0x40164",
-    "EventName": "PM_MRK_DERAT_MISS_2M",
-    "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   }
 ]
index 6f53583..4d66b75 100644 (file)
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled for any reason",
         "MetricExpr": "PM_DISP_STALL_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI;CPI_STALL_RATIO",
-        "MetricName": "DISPATCHED_CPI"
+        "MetricName": "DISPATCH_STALL_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because there was a flush",
         "MetricExpr": "PM_DISP_STALL_FLUSH / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_FLUSH_CPI"
+        "MetricName": "DISPATCH_STALL_FLUSH_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because Fetch was being held,  so there was nothing in the pipeline for this thread",
+        "MetricExpr": "PM_DISP_STALL_FETCH / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCH_STALL_FETCH_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because the MMU was handling a translation miss",
         "MetricExpr": "PM_DISP_STALL_TRANSLATION / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_TRANSLATION_CPI"
+        "MetricName": "DISPATCH_STALL_TRANSLATION_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction ERAT miss",
         "MetricExpr": "PM_DISP_STALL_IERAT_ONLY_MISS / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_IERAT_ONLY_MISS_CPI"
+        "MetricName": "DISPATCH_STALL_IERAT_ONLY_MISS_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction TLB miss",
         "MetricExpr": "PM_DISP_STALL_ITLB_MISS / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_ITLB_MISS_CPI"
+        "MetricName": "DISPATCH_STALL_ITLB_MISS_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss",
         "MetricExpr": "PM_DISP_STALL_IC_MISS / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_IC_MISS_CPI"
+        "MetricName": "DISPATCH_STALL_IC_MISS_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L2",
         "MetricExpr": "PM_DISP_STALL_IC_L2 / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_IC_L2_CPI"
+        "MetricName": "DISPATCH_STALL_IC_L2_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L3",
         "MetricExpr": "PM_DISP_STALL_IC_L3 / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_IC_L3_CPI"
+        "MetricName": "DISPATCH_STALL_IC_L3_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from any source beyond the local L3",
         "MetricExpr": "PM_DISP_STALL_IC_L3MISS / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_IC_L3MISS_CPI"
+        "MetricName": "DISPATCH_STALL_IC_L3MISS_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss after a branch mispredict",
         "MetricExpr": "PM_DISP_STALL_BR_MPRED_ICMISS / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_BR_MPRED_ICMISS_CPI"
+        "MetricName": "DISPATCH_STALL_BR_MPRED_ICMISS_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L2 after suffering a branch mispredict",
         "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L2 / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_BR_MPRED_IC_L2_CPI"
+        "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L2_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L3 after suffering a branch mispredict",
         "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3 / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_BR_MPRED_IC_L3_CPI"
+        "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L3_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from any source beyond the local L3 after suffering a branch mispredict",
         "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3MISS / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_BR_MPRED_IC_L3MISS_CPI"
+        "MetricName": "DISPATCH_STALL_BR_MPRED_IC_L3MISS_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to a branch mispredict",
         "MetricExpr": "PM_DISP_STALL_BR_MPRED / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_BR_MPRED_CPI"
+        "MetricName": "DISPATCH_STALL_BR_MPRED_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any reason",
         "MetricExpr": "PM_DISP_STALL_HELD_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_HELD_CPI"
+        "MetricName": "DISPATCH_STALL_HELD_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch",
         "MetricExpr": "PM_DISP_STALL_HELD_SYNC_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISP_HELD_STALL_SYNC_CPI"
+        "MetricName": "DISPATCH_STALL_HELD_SYNC_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch while waiting on the scoreboard",
         "MetricExpr": "PM_DISP_STALL_HELD_SCOREBOARD_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISP_HELD_STALL_SCOREBOARD_CPI"
+        "MetricName": "DISPATCH_STALL_HELD_SCOREBOARD_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch due to issue queue full",
         "MetricExpr": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISP_HELD_STALL_ISSQ_FULL_CPI"
+        "MetricName": "DISPATCH_STALL_HELD_ISSQ_FULL_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the mapper/SRB was full",
         "MetricExpr": "PM_DISP_STALL_HELD_RENAME_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_HELD_RENAME_CPI"
+        "MetricName": "DISPATCH_STALL_HELD_RENAME_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the STF mapper/SRB was full",
         "MetricExpr": "PM_DISP_STALL_HELD_STF_MAPPER_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_HELD_STF_MAPPER_CPI"
+        "MetricName": "DISPATCH_STALL_HELD_STF_MAPPER_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the XVFC mapper/SRB was full",
         "MetricExpr": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_HELD_XVFC_MAPPER_CPI"
+        "MetricName": "DISPATCH_STALL_HELD_XVFC_MAPPER_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any other reason",
         "MetricExpr": "PM_DISP_STALL_HELD_OTHER_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_HELD_OTHER_CPI"
+        "MetricName": "DISPATCH_STALL_HELD_OTHER_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction has been dispatched but not issued for any reason",
         "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because fetch was being held, so there was nothing in the pipeline for this thread",
         "MetricExpr": "PM_DISP_STALL_FETCH / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_FETCH_CPI"
+        "MetricName": "DISPATCH_STALL_FETCH_CPI"
     },
     {
         "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of power management",
         "MetricExpr": "PM_DISP_STALL_HELD_HALT_CYC / PM_RUN_INST_CMPL",
         "MetricGroup": "CPI",
-        "MetricName": "DISPATCHED_HELD_HALT_CPI"
+        "MetricName": "DISPATCH_STALL_HELD_HALT_CPI"
     },
     {
         "BriefDescription": "Percentage of flushes per completed instruction",
         "ScaleUnit": "1%"
     },
     {
+        "BriefDescription": "Percentage of completed instructions that were stores that missed the L1",
+        "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL",
+        "MetricGroup": "Others",
+        "MetricName": "L1_ST_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
         "BriefDescription": "Percentage of completed instructions when the DPTEG required for the load/store instruction in execution was missing from the TLB",
         "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL * 100",
         "MetricGroup": "Others",
         "MetricName": "LOADS_PER_INST"
     },
     {
-        "BriefDescription": "Average number of finished stores per completed instruction",
-        "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
-        "MetricGroup": "General",
-        "MetricName": "STORES_PER_INST"
-    },
-    {
         "BriefDescription": "Percentage of demand loads that reloaded from beyond the L2 per completed instruction",
         "MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100",
         "MetricGroup": "dL1_Reloads",
         "ScaleUnit": "1%"
     },
     {
+        "BriefDescription": "Percentage of ITLB misses per completed run instruction",
+        "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "General",
+        "MetricName": "ITLB_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
         "BriefDescription": "Percentage of DERAT misses with 4k page size per completed instruction",
         "MetricExpr": "PM_DERAT_MISS_4K / PM_RUN_INST_CMPL * 100",
         "MetricGroup": "Translation",
         "BriefDescription": "Average number of STCX instructions finshed per completed instruction",
         "MetricExpr": "PM_STCX_FIN / PM_RUN_INST_CMPL",
         "MetricGroup": "General",
-        "MetricName": "STXC_PER_INST"
+        "MetricName": "STCX_PER_INST"
     },
     {
         "BriefDescription": "Average number of LARX instructions finshed per completed instruction",
         "ScaleUnit": "1%"
     },
     {
+        "BriefDescription": "Percentage of DERAT misses with 1G page size per completed run instruction",
+        "MetricExpr": "PM_DERAT_MISS_1G * 100 / PM_RUN_INST_CMPL",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_1G_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
         "BriefDescription": "DERAT miss ratio for 4K page size",
         "MetricExpr": "PM_DERAT_MISS_4K / PM_DERAT_MISS",
         "MetricGroup": "Translation",
         "MetricName": "DERAT_16M_MISS_RATIO"
     },
     {
+        "BriefDescription": "DERAT miss ratio for 1G page size",
+        "MetricExpr": "PM_DERAT_MISS_1G / PM_DERAT_MISS",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_1G_MISS_RATIO"
+    },
+    {
         "BriefDescription": "DERAT miss ratio for 64K page size",
         "MetricExpr": "PM_DERAT_MISS_64K / PM_DERAT_MISS",
         "MetricGroup": "Translation",
index a771e4b..0e21e7b 100644 (file)
@@ -1,28 +1,13 @@
 [
   {
-    "EventCode": "0x10016",
-    "EventName": "PM_VSU0_ISSUE",
-    "BriefDescription": "VSU instructions issued to VSU pipe 0."
-  },
-  {
-    "EventCode": "0x1001C",
-    "EventName": "PM_ULTRAVISOR_INST_CMPL",
-    "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state."
-  },
-  {
-    "EventCode": "0x100F0",
-    "EventName": "PM_CYC",
-    "BriefDescription": "Processor cycles."
-  },
-  {
-    "EventCode": "0x10134",
-    "EventName": "PM_MRK_ST_DONE_L2",
-    "BriefDescription": "Marked stores completed in L2 (RC machine done)."
+    "EventCode": "0x1002C",
+    "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
+    "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
   },
   {
     "EventCode": "0x1505E",
     "EventName": "PM_LD_HIT_L1",
-    "BriefDescription": "Loads that finished without experiencing an L1 miss."
+    "BriefDescription": "Load finished without experiencing an L1 miss."
   },
   {
     "EventCode": "0x1F056",
@@ -30,9 +15,9 @@
     "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
   },
   {
-    "EventCode": "0x1F15C",
-    "EventName": "PM_MRK_STCX_L2_CYC",
-    "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)."
+    "EventCode": "0x1F05A",
+    "EventName": "PM_DISP_HELD_SYNC_CYC",
+    "BriefDescription": "Cycles dispatch is held because of a synchronizing instruction that requires the ICT to be empty before dispatch."
   },
   {
     "EventCode": "0x10066",
     "BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011."
   },
   {
-    "EventCode": "0x101E4",
-    "EventName": "PM_MRK_L1_ICACHE_MISS",
-    "BriefDescription": "Marked Instruction suffered an icache Miss."
-  },
-  {
-    "EventCode": "0x101EA",
-    "EventName": "PM_MRK_L1_RELOAD_VALID",
-    "BriefDescription": "Marked demand reload."
-  },
-  {
-    "EventCode": "0x100F4",
-    "EventName": "PM_FLOP_CMPL",
-    "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
-  },
-  {
-    "EventCode": "0x100FA",
-    "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
-    "BriefDescription": "Cycles when at least one thread has the run latch set."
-  },
-  {
     "EventCode": "0x100FC",
     "EventName": "PM_LD_REF_L1",
     "BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included."
   },
   {
-    "EventCode": "0x2000C",
-    "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
-    "BriefDescription": "Cycles when the run latch is set for all threads."
-  },
-  {
     "EventCode": "0x2E010",
     "EventName": "PM_ADJUNCT_INST_CMPL",
-    "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state."
+    "BriefDescription": "PowerPC instruction completed while the thread was in Adjunct state."
   },
   {
     "EventCode": "0x2E014",
     "BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
   },
   {
-    "EventCode": "0x20130",
-    "EventName": "PM_MRK_INST_DECODED",
-    "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
-  },
-  {
-    "EventCode": "0x20132",
-    "EventName": "PM_MRK_DFU_ISSUE",
-    "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
-  },
-  {
-    "EventCode": "0x20134",
-    "EventName": "PM_MRK_FXU_ISSUE",
-    "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
-  },
-  {
-    "EventCode": "0x2505C",
-    "EventName": "PM_VSU_ISSUE",
-    "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
-  },
-  {
     "EventCode": "0x2F054",
     "EventName": "PM_DISP_SS1_2_INSTR_CYC",
     "BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions."
     "BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions."
   },
   {
-    "EventCode": "0x2006C",
-    "EventName": "PM_RUN_CYC_SMT4_MODE",
-    "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
-  },
-  {
-    "EventCode": "0x201E0",
-    "EventName": "PM_MRK_DATA_FROM_MEMORY",
-    "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
-  },
-  {
-    "EventCode": "0x201E4",
-    "EventName": "PM_MRK_DATA_FROM_L3MISS",
-    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
-  },
-  {
-    "EventCode": "0x201E8",
-    "EventName": "PM_THRESH_EXC_512",
-    "BriefDescription": "Threshold counter exceeded a value of 512."
-  },
-  {
     "EventCode": "0x200F2",
     "EventName": "PM_INST_DISP",
-    "BriefDescription": "PowerPC instructions dispatched."
-  },
-  {
-    "EventCode": "0x30132",
-    "EventName": "PM_MRK_VSU_FIN",
-    "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit."
+    "BriefDescription": "PowerPC instruction dispatched."
   },
   {
-    "EventCode": "0x30038",
-    "EventName": "PM_EXEC_STALL_DMISS_LMEM",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory."
+    "EventCode": "0x200FD",
+    "EventName": "PM_L1_ICACHE_MISS",
+    "BriefDescription": "Demand instruction cache miss."
   },
   {
     "EventCode": "0x3F04A",
   {
     "EventCode": "0x3405A",
     "EventName": "PM_PRIVILEGED_INST_CMPL",
-    "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state."
-  },
-  {
-    "EventCode": "0x3F150",
-    "EventName": "PM_MRK_ST_DRAIN_CYC",
-    "BriefDescription": "cycles to drain st from core to L2."
+    "BriefDescription": "PowerPC instruction completed while the thread was in Privileged state."
   },
   {
     "EventCode": "0x3F054",
     "BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions."
   },
   {
-    "EventCode": "0x30162",
-    "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
-    "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
-  },
-  {
-    "EventCode": "0x40114",
-    "EventName": "PM_MRK_START_PROBE_NOP_DISP",
-    "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
-  },
-  {
-    "EventCode": "0x4001C",
-    "EventName": "PM_VSU_FIN",
-    "BriefDescription": "VSU instructions finished."
-  },
-  {
-    "EventCode": "0x4C01A",
-    "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
-  },
-  {
-    "EventCode": "0x4D012",
-    "EventName": "PM_PMC3_SAVED",
-    "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
-  },
-  {
-    "EventCode": "0x4D022",
-    "EventName": "PM_HYPERVISOR_INST_CMPL",
-    "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state."
-  },
-  {
-    "EventCode": "0x4D026",
-    "EventName": "PM_ULTRAVISOR_CYC",
-    "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
+    "EventCode": "0x30068",
+    "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+    "BriefDescription": "Counts all instruction cache prefetch reloads (includes demand turned into prefetch)."
   },
   {
-    "EventCode": "0x4D028",
-    "EventName": "PM_PRIVILEGED_CYC",
-    "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
+    "EventCode": "0x300F6",
+    "EventName": "PM_LD_DEMAND_MISS_L1",
+    "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
   },
   {
-    "EventCode": "0x40030",
-    "EventName": "PM_INST_FIN",
-    "BriefDescription": "Instructions finished."
+    "EventCode": "0x300FE",
+    "EventName": "PM_DATA_FROM_L3MISS",
+    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
   },
   {
-    "EventCode": "0x44146",
-    "EventName": "PM_MRK_STCX_CORE_CYC",
-    "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
+    "EventCode": "0x40012",
+    "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+    "BriefDescription": "Counts all instruction cache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
   },
   {
     "EventCode": "0x44054",
     "EventName": "PM_VECTOR_LD_CMPL",
-    "BriefDescription": "Vector load instructions completed."
-  },
-  {
-    "EventCode": "0x45054",
-    "EventName": "PM_FMA_CMPL",
-    "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
-  },
-  {
-    "EventCode": "0x45056",
-    "EventName": "PM_SCALAR_FLOP_CMPL",
-    "BriefDescription": "Scalar floating point instructions completed."
-  },
-  {
-    "EventCode": "0x4505C",
-    "EventName": "PM_MATH_FLOP_CMPL",
-    "BriefDescription": "Math floating point instructions completed."
+    "BriefDescription": "Vector load instruction completed."
   },
   {
     "EventCode": "0x4D05E",
     "BriefDescription": "A branch completed. All branches are included."
   },
   {
-    "EventCode": "0x4E15E",
-    "EventName": "PM_MRK_INST_FLUSHED",
-    "BriefDescription": "The marked instruction was flushed."
-  },
-  {
-    "EventCode": "0x401E6",
-    "EventName": "PM_MRK_INST_FROM_L3MISS",
-    "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction."
-  },
-  {
-    "EventCode": "0x401E8",
-    "EventName": "PM_MRK_DATA_FROM_L2MISS",
-    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load."
-  },
-  {
     "EventCode": "0x400F0",
     "EventName": "PM_LD_DEMAND_MISS_L1_FIN",
-    "BriefDescription": "Load Missed L1, counted at finish time."
+    "BriefDescription": "Load missed L1, counted at finish time."
   },
   {
-    "EventCode": "0x500FA",
-    "EventName": "PM_RUN_INST_CMPL",
-    "BriefDescription": "Completed PowerPC instructions gated by the run latch."
+    "EventCode": "0x400FE",
+    "EventName": "PM_DATA_FROM_MEMORY",
+    "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
   }
 ]
index b8aded6..21b23bb 100644 (file)
@@ -1,8 +1,13 @@
 [
   {
-    "EventCode": "0x100FE",
-    "EventName": "PM_INST_CMPL",
-    "BriefDescription": "PowerPC instructions completed."
+    "EventCode": "0x10004",
+    "EventName": "PM_EXEC_STALL_TRANSLATION",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
+  },
+  {
+    "EventCode": "0x10006",
+    "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
+    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any other reason."
   },
   {
     "EventCode": "0x1000C",
@@ -12,7 +17,7 @@
   {
     "EventCode": "0x1000E",
     "EventName": "PM_MMA_ISSUED",
-    "BriefDescription": "MMA instructions issued."
+    "BriefDescription": "MMA instruction issued."
   },
   {
     "EventCode": "0x10012",
     "BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss."
   },
   {
-    "EventCode": "0x10022",
-    "EventName": "PM_PMC2_SAVED",
-    "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
+    "EventCode": "0x10028",
+    "EventName": "PM_NTC_FLUSH",
+    "BriefDescription": "The instruction was flushed after becoming next-to-complete (NTC)."
+  },
+  {
+    "EventCode": "0x10038",
+    "EventName": "PM_DISP_STALL_TRANSLATION",
+    "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
+  },
+  {
+    "EventCode": "0x1003A",
+    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
+    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
   },
   {
-    "EventCode": "0x10024",
-    "EventName": "PM_PMC5_OVERFLOW",
-    "BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
+    "EventCode": "0x1003C",
+    "EventName": "PM_EXEC_STALL_DMISS_L2L3",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
   },
   {
     "EventCode": "0x10058",
     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
+    "EventCode": "0x1D05E",
+    "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
+    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of power management."
+  },
+  {
+    "EventCode": "0x1E050",
+    "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
+    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
+  },
+  {
+    "EventCode": "0x1E054",
+    "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
+  },
+  {
+    "EventCode": "0x1E056",
+    "EventName": "PM_EXEC_STALL_STORE_PIPE",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
+  },
+  {
     "EventCode": "0x1E05A",
     "EventName": "PM_CMPL_STALL_LWSYNC",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
   },
   {
+    "EventCode": "0x1F058",
+    "EventName": "PM_DISP_HELD_CYC",
+    "BriefDescription": "Cycles dispatch is held."
+  },
+  {
+    "EventCode": "0x10064",
+    "EventName": "PM_DISP_STALL_IC_L2",
+    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+  },
+  {
     "EventCode": "0x10068",
     "EventName": "PM_BR_FIN",
     "BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional."
     "BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time."
   },
   {
-    "EventCode": "0x1006C",
-    "EventName": "PM_RUN_CYC_ST_MODE",
-    "BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
+    "EventCode": "0x100F8",
+    "EventName": "PM_DISP_STALL_CYC",
+    "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
   },
   {
     "EventCode": "0x20004",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet."
   },
   {
-    "EventCode": "0x2000A",
-    "EventName": "PM_HYPERVISOR_CYC",
-    "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
+    "EventCode": "0x20006",
+    "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
+    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
   },
   {
     "EventCode": "0x2000E",
     "BriefDescription": "LSU Finished an internal operation in LD1 port."
   },
   {
+    "EventCode": "0x2C010",
+    "EventName": "PM_EXEC_STALL_LSU",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
+  },
+  {
     "EventCode": "0x2C014",
     "EventName": "PM_CMPL_STALL_SPECIAL",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing."
   },
   {
+    "EventCode": "0x2C016",
+    "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
+    "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
+  },
+  {
     "EventCode": "0x2C018",
     "EventName": "PM_EXEC_STALL_DMISS_L3MISS",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3."
   },
   {
+    "EventCode": "0x2C01C",
+    "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
+  },
+  {
+    "EventCode": "0x2C01E",
+    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
+    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
+  },
+  {
     "EventCode": "0x2D010",
     "EventName": "PM_LSU_ST1_FIN",
     "BriefDescription": "LSU Finished an internal operation in ST1 port."
   },
   {
+    "EventCode": "0x10016",
+    "EventName": "PM_VSU0_ISSUE",
+    "BriefDescription": "VSU instruction issued to VSU pipe 0."
+  },
+  {
     "EventCode": "0x2D012",
     "EventName": "PM_VSU1_ISSUE",
-    "BriefDescription": "VSU instructions issued to VSU pipe 1."
+    "BriefDescription": "VSU instruction issued to VSU pipe 1."
+  },
+  {
+    "EventCode": "0x2505C",
+    "EventName": "PM_VSU_ISSUE",
+    "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
+  },
+  {
+    "EventCode": "0x4001C",
+    "EventName": "PM_VSU_FIN",
+    "BriefDescription": "VSU instruction finished."
   },
   {
     "EventCode": "0x2D018",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)."
   },
   {
+    "EventCode": "0x2D01A",
+    "EventName": "PM_DISP_STALL_IC_MISS",
+    "BriefDescription": "Cycles when dispatch was stalled for this thread due to an instruction cache miss."
+  },
+  {
     "EventCode": "0x2D01C",
     "EventName": "PM_CMPL_STALL_STCX",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
   },
   {
-    "EventCode": "0x2E01E",
-    "EventName": "PM_EXEC_STALL_NTC_FLUSH",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch."
+    "EventCode": "0x2E018",
+    "EventName": "PM_DISP_STALL_FETCH",
+    "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
+  },
+  {
+    "EventCode": "0x2E01A",
+    "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
+    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the XVFC mapper/SRB was full."
+  },
+  {
+    "EventCode": "0x2E01C",
+    "EventName": "PM_EXEC_STALL_TLBIE",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
   },
   {
-    "EventCode": "0x2013C",
-    "EventName": "PM_MRK_FX_LSU_FIN",
-    "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
+    "EventCode": "0x2E01E",
+    "EventName": "PM_EXEC_STALL_NTC_FLUSH",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous next-to-finish (NTF) instruction is still completing and the new NTF instruction is stalled at dispatch."
   },
   {
     "EventCode": "0x2405A",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status."
   },
   {
-    "EventCode": "0x201E2",
-    "EventName": "PM_MRK_LD_MISS_L1",
-    "BriefDescription": "Marked DL1 Demand Miss counted at finish time."
+    "EventCode": "0x20066",
+    "EventName": "PM_DISP_HELD_OTHER_CYC",
+    "BriefDescription": "Cycles dispatch is held for any other reason."
+  },
+  {
+    "EventCode": "0x2006A",
+    "EventName": "PM_DISP_HELD_STF_MAPPER_CYC",
+    "BriefDescription": "Cycles dispatch is held because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
   },
   {
-    "EventCode": "0x200F4",
-    "EventName": "PM_RUN_CYC",
-    "BriefDescription": "Processor cycles gated by the run latch."
+    "EventCode": "0x30004",
+    "EventName": "PM_DISP_STALL_FLUSH",
+    "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet next-to-complete (NTC). PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
   },
   {
     "EventCode": "0x30008",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category."
   },
   {
-    "EventCode": "0x3001A",
-    "EventName": "PM_LSU_ST2_FIN",
-    "BriefDescription": "LSU Finished an internal operation in ST2 port."
+    "EventCode": "0x30014",
+    "EventName": "PM_EXEC_STALL_STORE",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
+  },
+  {
+    "EventCode": "0x30016",
+    "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
   },
   {
-    "EventCode": "0x30020",
-    "EventName": "PM_PMC2_REWIND",
-    "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
+    "EventCode": "0x30018",
+    "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
+    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
   },
   {
-    "EventCode": "0x30022",
-    "EventName": "PM_PMC4_SAVED",
-    "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
+    "EventCode": "0x3001A",
+    "EventName": "PM_LSU_ST2_FIN",
+    "BriefDescription": "LSU Finished an internal operation in ST2 port."
   },
   {
-    "EventCode": "0x30024",
-    "EventName": "PM_PMC6_OVERFLOW",
-    "BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
+    "EventCode": "0x30026",
+    "EventName": "PM_EXEC_STALL_STORE_MISS",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
   },
   {
     "EventCode": "0x30028",
     "EventName": "PM_CMPL_STALL_MEM_ECC",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC."
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a STCX waiting for its result or a load waiting for non-critical sectors of data and ECC."
   },
   {
     "EventCode": "0x30036",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit."
   },
   {
+    "EventCode": "0x30038",
+    "EventName": "PM_EXEC_STALL_DMISS_LMEM",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCAPI cache, or local OpenCAPI memory."
+  },
+  {
     "EventCode": "0x3003A",
     "EventName": "PM_CMPL_STALL_EXCEPTION",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete."
   {
     "EventCode": "0x3F044",
     "EventName": "PM_VSU2_ISSUE",
-    "BriefDescription": "VSU instructions issued to VSU pipe 2."
+    "BriefDescription": "VSU instruction issued to VSU pipe 2."
   },
   {
     "EventCode": "0x30058",
     "EventName": "PM_TLBIE_FIN",
-    "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
+    "BriefDescription": "TLBIE instruction finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
   },
   {
-    "EventCode": "0x3D058",
-    "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE",
-    "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)."
+    "EventCode": "0x34054",
+    "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
+  },
+  {
+    "EventCode": "0x34056",
+    "EventName": "PM_EXEC_STALL_LOAD_FINISH",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the next-to-finish (NTF) instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
+  },
+  {
+    "EventCode": "0x34058",
+    "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
+    "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
+  },
+  {
+    "EventCode": "0x3D05C",
+    "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
+    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
+  },
+  {
+    "EventCode": "0x3E052",
+    "EventName": "PM_DISP_STALL_IC_L3",
+    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
+  },
+  {
+    "EventCode": "0x30060",
+    "EventName": "PM_DISP_HELD_XVFC_MAPPER_CYC",
+    "BriefDescription": "Cycles dispatch is held because the XVFC mapper/SRB was full."
   },
   {
     "EventCode": "0x30066",
     "BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline."
   },
   {
-    "EventCode": "0x40010",
-    "EventName": "PM_PMC3_OVERFLOW",
-    "BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
+    "EventCode": "0x4C010",
+    "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
+    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
   },
   {
     "EventCode": "0x4C012",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve."
   },
   {
+    "EventCode": "0x4C016",
+    "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
+  },
+  {
     "EventCode": "0x4C018",
     "EventName": "PM_CMPL_STALL",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason."
   },
   {
+    "EventCode": "0x4C01A",
+    "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
+  },
+  {
     "EventCode": "0x4C01E",
     "EventName": "PM_LSU_ST3_FIN",
     "BriefDescription": "LSU Finished an internal operation in ST3 port."
   },
   {
+    "EventCode": "0x4D014",
+    "EventName": "PM_EXEC_STALL_LOAD",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
+  },
+  {
+    "EventCode": "0x4D016",
+    "EventName": "PM_EXEC_STALL_PTESYNC",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
+  },
+  {
     "EventCode": "0x4D018",
     "EventName": "PM_EXEC_STALL_BRU",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit."
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest."
   },
   {
+    "EventCode": "0x4D01E",
+    "EventName": "PM_DISP_STALL_BR_MPRED",
+    "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
+  },
+  {
+    "EventCode": "0x4E010",
+    "EventName": "PM_DISP_STALL_IC_L3MISS",
+    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
+  },
+  {
     "EventCode": "0x4E012",
     "EventName": "PM_EXEC_STALL_UNKNOWN",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together."
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the next-to-finish (NTF) instruction finishes and completions came too close together."
+  },
+  {
+    "EventCode": "0x4E01A",
+    "EventName": "PM_DISP_STALL_HELD_CYC",
+    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch for any reason."
   },
   {
     "EventCode": "0x4D020",
     "BriefDescription": "VSU instruction was issued to VSU pipe 3."
   },
   {
-    "EventCode": "0x40132",
-    "EventName": "PM_MRK_LSU_FIN",
-    "BriefDescription": "LSU marked instruction finish."
+    "EventCode": "0x4003C",
+    "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
+    "BriefDescription": "Cycles in which the next-to-complete (NTC) instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
   },
   {
     "EventCode": "0x45058",
     "EventName": "PM_IC_MISS_CMPL",
-    "BriefDescription": "Non-speculative icache miss, counted at completion."
+    "BriefDescription": "Non-speculative instruction cache miss, counted at completion."
   },
   {
-    "EventCode": "0x4D050",
-    "EventName": "PM_VSU_NON_FLOP_CMPL",
-    "BriefDescription": "Non-floating point VSU instructions completed."
+    "EventCode": "0x40060",
+    "EventName": "PM_DISP_HELD_SCOREBOARD_CYC",
+    "BriefDescription": "Cycles dispatch is held while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
   },
   {
-    "EventCode": "0x4D052",
-    "EventName": "PM_2FLOP_CMPL",
-    "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
+    "EventCode": "0x40062",
+    "EventName": "PM_DISP_HELD_RENAME_CYC",
+    "BriefDescription": "Cycles dispatch is held because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
   },
   {
     "EventCode": "0x400F2",
index b5d1bd3..c606ae0 100644 (file)
 [
   {
+    "EventCode": "0x100FE",
+    "EventName": "PM_INST_CMPL",
+    "BriefDescription": "PowerPC instruction completed."
+  },
+  {
+    "EventCode": "0x1000A",
+    "EventName": "PM_PMC3_REWIND",
+    "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
+  },
+  {
+    "EventCode": "0x10010",
+    "EventName": "PM_PMC4_OVERFLOW",
+    "BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
+  },
+  {
+    "EventCode": "0x1001C",
+    "EventName": "PM_ULTRAVISOR_INST_CMPL",
+    "BriefDescription": "PowerPC instruction completed while the thread was in ultravisor state."
+  },
+  {
+    "EventCode": "0x100F0",
+    "EventName": "PM_CYC",
+    "BriefDescription": "Processor cycles."
+  },
+  {
+    "EventCode": "0x10020",
+    "EventName": "PM_PMC4_REWIND",
+    "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
+  },
+  {
+    "EventCode": "0x10022",
+    "EventName": "PM_PMC2_SAVED",
+    "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
+  },
+  {
+    "EventCode": "0x10024",
+    "EventName": "PM_PMC5_OVERFLOW",
+    "BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
+  },
+  {
+    "EventCode": "0x1002A",
+    "EventName": "PM_PMC3_HELD_CYC",
+    "BriefDescription": "Cycles when the speculative counter for PMC3 is frozen."
+  },
+  {
+    "EventCode": "0x1F15E",
+    "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
+    "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
+  },
+  {
+    "EventCode": "0x1006C",
+    "EventName": "PM_RUN_CYC_ST_MODE",
+    "BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
+  },
+  {
+    "EventCode": "0x101E8",
+    "EventName": "PM_THRESH_EXC_256",
+    "BriefDescription": "Threshold counter exceeded a count of 256."
+  },
+  {
+    "EventCode": "0x101EC",
+    "EventName": "PM_THRESH_MET",
+    "BriefDescription": "Threshold exceeded."
+  },
+  {
+    "EventCode": "0x100FA",
+    "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
+    "BriefDescription": "Cycles when at least one thread has the run latch set."
+  },
+  {
+    "EventCode": "0x2000A",
+    "EventName": "PM_HYPERVISOR_CYC",
+    "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
+  },
+  {
+    "EventCode": "0x2000C",
+    "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
+    "BriefDescription": "Cycles when the run latch is set for all threads."
+  },
+  {
+    "EventCode": "0x20010",
+    "EventName": "PM_PMC1_OVERFLOW",
+    "BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
+  },
+  {
+    "EventCode": "0x2006C",
+    "EventName": "PM_RUN_CYC_SMT4_MODE",
+    "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
+  },
+  {
+    "EventCode": "0x201E6",
+    "EventName": "PM_THRESH_EXC_32",
+    "BriefDescription": "Threshold counter exceeded a value of 32."
+  },
+  {
+    "EventCode": "0x201E8",
+    "EventName": "PM_THRESH_EXC_512",
+    "BriefDescription": "Threshold counter exceeded a value of 512."
+  },
+  {
+    "EventCode": "0x200F4",
+    "EventName": "PM_RUN_CYC",
+    "BriefDescription": "Processor cycles gated by the run latch."
+  },
+  {
+    "EventCode": "0x30010",
+    "EventName": "PM_PMC2_OVERFLOW",
+    "BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
+  },
+  {
+    "EventCode": "0x30020",
+    "EventName": "PM_PMC2_REWIND",
+    "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
+  },
+  {
+    "EventCode": "0x30022",
+    "EventName": "PM_PMC4_SAVED",
+    "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
+  },
+  {
+    "EventCode": "0x30024",
+    "EventName": "PM_PMC6_OVERFLOW",
+    "BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
+  },
+  {
+    "EventCode": "0x3006C",
+    "EventName": "PM_RUN_CYC_SMT2_MODE",
+    "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
+  },
+  {
     "EventCode": "0x301E8",
     "EventName": "PM_THRESH_EXC_64",
     "BriefDescription": "Threshold counter exceeded a value of 64."
   },
   {
-    "EventCode": "0x45050",
-    "EventName": "PM_1FLOP_CMPL",
-    "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+    "EventCode": "0x301EA",
+    "EventName": "PM_THRESH_EXC_1024",
+    "BriefDescription": "Threshold counter exceeded a value of 1024."
+  },
+  {
+    "EventCode": "0x40010",
+    "EventName": "PM_PMC3_OVERFLOW",
+    "BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
+  },
+  {
+    "EventCode": "0x40114",
+    "EventName": "PM_MRK_START_PROBE_NOP_DISP",
+    "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
+  },
+  {
+    "EventCode": "0x4D010",
+    "EventName": "PM_PMC1_SAVED",
+    "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
+  },
+  {
+    "EventCode": "0x4D012",
+    "EventName": "PM_PMC3_SAVED",
+    "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
+  },
+  {
+    "EventCode": "0x4D022",
+    "EventName": "PM_HYPERVISOR_INST_CMPL",
+    "BriefDescription": "PowerPC instruction completed while the thread was in hypervisor state."
+  },
+  {
+    "EventCode": "0x4D026",
+    "EventName": "PM_ULTRAVISOR_CYC",
+    "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
+  },
+  {
+    "EventCode": "0x4D028",
+    "EventName": "PM_PRIVILEGED_CYC",
+    "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
+  },
+  {
+    "EventCode": "0x4D02C",
+    "EventName": "PM_PMC1_REWIND",
+    "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
+  },
+  {
+    "EventCode": "0x40030",
+    "EventName": "PM_INST_FIN",
+    "BriefDescription": "Instruction finished."
+  },
+  {
+    "EventCode": "0x40134",
+    "EventName": "PM_MRK_INST_TIMEO",
+    "BriefDescription": "Marked instruction finish timeout (instruction was lost)."
   },
   {
-    "EventCode": "0x45052",
-    "EventName": "PM_4FLOP_CMPL",
-    "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
+    "EventCode": "0x401EA",
+    "EventName": "PM_THRESH_EXC_128",
+    "BriefDescription": "Threshold counter exceeded a value of 128."
   },
   {
-    "EventCode": "0x4D054",
-    "EventName": "PM_8FLOP_CMPL",
-    "BriefDescription": "Four Double Precision vector instructions completed."
+    "EventCode": "0x400FA",
+    "EventName": "PM_RUN_INST_CMPL",
+    "BriefDescription": "PowerPC instruction completed while the run latch is set."
   }
 ]
index db3766d..ea73900 100644 (file)
@@ -1,35 +1,10 @@
 [
   {
-    "EventCode": "0x1F15E",
-    "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
-    "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
-  },
-  {
-    "EventCode": "0x20016",
-    "EventName": "PM_ST_FIN",
-    "BriefDescription": "Store finish count. Includes speculative activity."
-  },
-  {
     "EventCode": "0x20018",
     "EventName": "PM_ST_FWD",
     "BriefDescription": "Store forwards that finished."
   },
   {
-    "EventCode": "0x2011C",
-    "EventName": "PM_MRK_NTF_CYC",
-    "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)."
-  },
-  {
-    "EventCode": "0x2E01C",
-    "EventName": "PM_EXEC_STALL_TLBIE",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
-  },
-  {
-    "EventCode": "0x201E6",
-    "EventName": "PM_THRESH_EXC_32",
-    "BriefDescription": "Threshold counter exceeded a value of 32."
-  },
-  {
     "EventCode": "0x200F0",
     "EventName": "PM_ST_CMPL",
     "BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)."
   {
     "EventCode": "0x200FE",
     "EventName": "PM_DATA_FROM_L2MISS",
-    "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss."
-  },
-  {
-    "EventCode": "0x30010",
-    "EventName": "PM_PMC2_OVERFLOW",
-    "BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
-  },
-  {
-    "EventCode": "0x4D010",
-    "EventName": "PM_PMC1_SAVED",
-    "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
+    "BriefDescription": "The processor's L1 data cache was reloaded from a source beyond the local core's L2 due to a demand miss."
   },
   {
-    "EventCode": "0x4D05C",
-    "EventName": "PM_DPP_FLOP_CMPL",
-    "BriefDescription": "Double-Precision or Quad-Precision instructions completed."
+    "EventCode": "0x300F0",
+    "EventName": "PM_ST_MISS_L1",
+    "BriefDescription": "Store Missed L1."
   }
 ]
index daf9458..c6780d5 100644 (file)
     },
     {
         "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
         "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc",
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu_core@FP_ARITH_DISPATCHED.PORT_0@ + cpu_core@FP_ARITH_DISPATCHED.PORT_1@ + cpu_core@FP_ARITH_DISPATCHED.PORT_5@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire",
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 0f1628d..06e67e3 100644 (file)
     },
     {
         "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
         "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 8fcc05c..a6eed0d 100644 (file)
@@ -85,6 +85,7 @@
     },
     {
         "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_4k_aliasing",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 9bb7e3f..7082ad5 100644 (file)
     },
     {
         "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_4k_aliasing",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 6650100..3a8770e 100644 (file)
@@ -19,12 +19,12 @@ GenuineIntel-6-3A,v24,ivybridge,core
 GenuineIntel-6-3E,v23,ivytown,core
 GenuineIntel-6-2D,v23,jaketown,core
 GenuineIntel-6-(57|85),v10,knightslanding,core
-GenuineIntel-6-A[AC],v1.03,meteorlake,core
+GenuineIntel-6-A[AC],v1.04,meteorlake,core
 GenuineIntel-6-1[AEF],v3,nehalemep,core
 GenuineIntel-6-2E,v3,nehalemex,core
 GenuineIntel-6-A7,v1.01,rocketlake,core
 GenuineIntel-6-2A,v19,sandybridge,core
-GenuineIntel-6-(8F|CF),v1.14,sapphirerapids,core
+GenuineIntel-6-(8F|CF),v1.15,sapphirerapids,core
 GenuineIntel-6-AF,v1.00,sierraforest,core
 GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
 GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core
index e1ae7c9..1de0200 100644 (file)
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.L2_STALLS",
+        "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Number of L1D misses that are outstanding",
         "EventCode": "0x48",
         "EventName": "L1D_PEND_MISS.PENDING",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles when L1D is locked",
+        "EventCode": "0x42",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
         "EventCode": "0x2e",
         "EventName": "LONGEST_LAT_CACHE.MISS",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "Data_LA": "1",
+        "EventCode": "0xd2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
         "Data_LA": "1",
         "EventCode": "0xd2",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
+        "EventCode": "0x44",
+        "EventName": "MEM_STORE_RETIRED.L2_HIT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of load ops retired.",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cacheable and Non-Cacheable code read requests",
+        "EventCode": "0x21",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Demand Data Read requests sent to uncore",
         "EventCode": "0x21",
         "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles where at least 1 outstanding demand data read request is pending.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.   Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterMask": "6",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Store Read transactions pending for off-core. Highly correlated.",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
         "EventCode": "0x2c",
         "EventName": "SQ_MISC.BUS_LOCK",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+        "EventCode": "0x40",
+        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHW instructions executed.",
+        "EventCode": "0x40",
+        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+        "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+        "EventCode": "0x40",
+        "EventName": "SW_PREFETCH_ACCESS.T0",
+        "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "EventCode": "0x40",
+        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+        "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to an icache miss",
         "EventCode": "0x71",
         "EventName": "TOPDOWN_FE_BOUND.ICACHE",
index 616489f..f66506e 100644 (file)
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.PORT_5",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
index 0f06451..8264419 100644 (file)
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "DSB_FILL.FB_STALL_OT",
+        "EventCode": "0x62",
+        "EventName": "DSB_FILL.FB_STALL_OT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Retired ANT branches",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.ANY_ANT",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired Instructions who experienced DSB miss.",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x1",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x3",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x11",
+        "PEBS": "1",
+        "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x3",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.ITLB_MISS",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x13",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x3",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x15",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x3",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
index 67e949b..2605e1d 100644 (file)
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "Number of machine clears due to memory ordering conflicts.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
         "CounterMask": "3",
         "EventCode": "0x47",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "MEMORY_ORDERING.MD_NUKE",
+        "EventCode": "0x09",
+        "EventName": "MEMORY_ORDERING.MD_NUKE",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of memory ordering machine clears due to memory renaming.",
+        "EventCode": "0x09",
+        "EventName": "MEMORY_ORDERING.MRN_NUKE",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x400",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "53",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles.",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x800",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "23",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "SampleAfterValue": "100003",
         "UMask": "0x10",
         "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles where data return is pending for a Demand Data Read request who miss L3 cache.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.  Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles where the core is waiting on at least 6 outstanding demand data read requests known to have missed the L3 cache.",
+        "CounterMask": "6",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+        "PublicDescription": "Cycles where the core is waiting on at least 6 outstanding demand data read requests known to have missed the L3 cache.  Note that this event does not capture all elapsed cycles while the requests are outstanding - only cycles from when the requests were known to have missed the L3 cache.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
     }
 ]
index 2ec57f4..f4c6035 100644 (file)
@@ -1,5 +1,13 @@
 [
     {
+        "BriefDescription": "ASSISTS.PAGE_FAULT",
+        "EventCode": "0xc1",
+        "EventName": "ASSISTS.PAGE_FAULT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts streaming stores that have any type of response.",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "RS.EMPTY_RESOURCE",
+        "EventCode": "0xa5",
+        "EventName": "RS.EMPTY_RESOURCE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)",
         "EventCode": "0x75",
         "EventName": "SERIALIZATION.C01_MS_SCB",
index eeaa7a9..352c5ef 100644 (file)
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.RET",
+        "PEBS": "1",
+        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
         "EventCode": "0xc5",
         "EventName": "BR_MISP_RETIRED.RETURN",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.C01",
+        "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.  This state can be entered via the TPAUSE or UMWAIT instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.C02",
+        "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.  This state can be entered via the TPAUSE or UMWAIT instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.C0_WAIT",
+        "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x70",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles",
         "EventName": "CPU_CLK_UNHALTED.CORE",
         "SampleAfterValue": "2000003",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "CPU_CLK_UNHALTED.PAUSE",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.PAUSE",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
         "EventCode": "0x3c",
         "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired NOP instructions.",
+        "EventCode": "0xc0",
+        "EventName": "INST_RETIRED.NOP",
+        "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Precise instruction retired with PEBS precise-distribution",
         "EventName": "INST_RETIRED.PREC_DIST",
         "PEBS": "1",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Iterations of Repeat string retired instructions.",
+        "EventCode": "0xc0",
+        "EventName": "INST_RETIRED.REP_ITERATION",
+        "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
         "CounterMask": "1",
         "EventCode": "0xad",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Clears speculative count",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0xad",
+        "EventName": "INT_MISC.CLEARS_COUNT",
+        "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
+        "SampleAfterValue": "500009",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
         "EventCode": "0xad",
         "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread",
+        "EventCode": "0xad",
+        "EventName": "INT_MISC.RAT_STALLS",
+        "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
         "EventCode": "0xad",
         "EventName": "INT_MISC.RECOVERY_CYCLES",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+        "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
         "EventCode": "0x03",
         "EventName": "LD_BLOCKS.DATA_UNKNOWN",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x88",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.",
         "EventCode": "0x03",
         "EventName": "LD_BLOCKS.STORE_FORWARD",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x82",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
         "CounterMask": "1",
         "EventCode": "0xa8",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "LFENCE instructions retired",
+        "EventCode": "0xe0",
+        "EventName": "MISC2_RETIRED.LFENCE",
+        "PublicDescription": "number of LFENCE retired instructions",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
         "EventCode": "0xa2",
         "EventName": "RESOURCE_STALLS.SCOREBOARD",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles with retired uop(s).",
+        "CounterMask": "1",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.CYCLES",
+        "PublicDescription": "Counts cycles where at least one uop has retired.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Retired uops except the last uop of each instruction.",
         "EventCode": "0xc2",
         "EventName": "UOPS_RETIRED.HEAVY",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.STALLS",
+        "Invert": "1",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Cycles with less than 10 actually retired uops.",
         "CounterMask": "10",
         "EventCode": "0xc2",
index 1bb9ced..a0191c8 100644 (file)
@@ -85,6 +85,7 @@
     },
     {
         "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_4k_aliasing",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 31b6be9..442ef38 100644 (file)
         "UMask": "0x1"
     },
     {
+        "BriefDescription": "Counts demand data reads that were supplied by PMM attached to this socket, whether or not in Sub NUMA Cluster(SNC) Mode.  In SNC Mode counts PMM accesses that are controlled by the close or distant SNC Cluster.",
+        "EventCode": "0x2A,0x2B",
+        "EventName": "OCR.DEMAND_DATA_RD.LOCAL_SOCKET_PMM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x700C00001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by PMM.",
+        "EventCode": "0x2A,0x2B",
+        "EventName": "OCR.DEMAND_DATA_RD.PMM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x703C00001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads that were supplied by DRAM attached to another socket.",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_DATA_RD.REMOTE_DRAM",
index c207c85..222212a 100644 (file)
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector + tma_fp_amx",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR_HALF + 2 * (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF) + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * (FP_ARITH_INST_RETIRED2.128B_PACKED_HALF + cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@) + 16 * (FP_ARITH_INST_RETIRED2.256B_PACKED_HALF + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) + 32 * FP_ARITH_INST_RETIRED2.512B_PACKED_HALF + 4 * AMX_OPS_RETIRED.BF16",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(FP_ARITH_DISPATCHED.PORT_0 + FP_ARITH_DISPATCHED.PORT_1 + FP_ARITH_DISPATCHED.PORT_5) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 94cb385..2795a40 100644 (file)
     },
     {
         "BriefDescription": "Average number of parallel data read requests to external memory",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.DATA_READ / UNC_ARB_TRK_OCCUPANCY.DATA_READ@thresh\\=1@",
+        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.DATA_READ / UNC_ARB_TRK_OCCUPANCY.DATA_READ@cmask\\=1@",
         "MetricGroup": "Mem;MemoryBW;SoC",
         "MetricName": "tma_info_system_mem_parallel_reads",
         "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
index c7c2d6a..fab084e 100644 (file)
@@ -79,6 +79,7 @@
     },
     {
         "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_4k_aliasing",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index a630c61..12bd043 100644 (file)
@@ -266,19 +266,53 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = {
        },
 };
 
-int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn,
-                                   void *data)
+int pmu_events_table__for_each_event(const struct pmu_events_table *table, struct perf_pmu *pmu,
+                                    pmu_event_iter_fn fn, void *data)
 {
        for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
-               int ret = fn(pe, table, data);
+               int ret;
 
+                if (pmu && !pmu__name_match(pmu, pe->pmu))
+                        continue;
+
+               ret = fn(pe, table, data);
                if (ret)
                        return ret;
        }
        return 0;
 }
 
-int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
+int pmu_events_table__find_event(const struct pmu_events_table *table,
+                                 struct perf_pmu *pmu,
+                                 const char *name,
+                                 pmu_event_iter_fn fn,
+                                 void *data)
+{
+       for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
+                if (pmu && !pmu__name_match(pmu, pe->pmu))
+                        continue;
+
+               if (!strcasecmp(pe->name, name))
+                       return fn(pe, table, data);
+       }
+        return -1000;
+}
+
+size_t pmu_events_table__num_events(const struct pmu_events_table *table,
+                                    struct perf_pmu *pmu)
+{
+        size_t count = 0;
+
+       for (const struct pmu_event *pe = &table->entries[0]; pe->name; pe++) {
+                if (pmu && !pmu__name_match(pmu, pe->pmu))
+                        continue;
+
+               count++;
+       }
+        return count;
+}
+
+int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
                                      void *data)
 {
        for (const struct pmu_metric *pm = &table->entries[0]; pm->metric_expr; pm++) {
@@ -371,7 +405,8 @@ const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const
 int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
 {
        for (const struct pmu_events_map *tables = &pmu_events_map[0]; tables->arch; tables++) {
-               int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data);
+               int ret = pmu_events_table__for_each_event(&tables->event_table,
+                                                          /*pmu=*/ NULL, fn, data);
 
                if (ret)
                        return ret;
@@ -384,7 +419,7 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
        for (const struct pmu_events_map *tables = &pmu_events_map[0];
             tables->arch;
             tables++) {
-               int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data);
+               int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
 
                if (ret)
                        return ret;
@@ -408,7 +443,7 @@ int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
             tables->name;
             tables++) {
-               int ret = pmu_events_table_for_each_event(&tables->table, fn, data);
+               int ret = pmu_events_table__for_each_event(&tables->table, /*pmu=*/ NULL, fn, data);
 
                if (ret)
                        return ret;
index 12e80bb..a7e8833 100755 (executable)
@@ -42,7 +42,7 @@ _metricgroups = {}
 # Order specific JsonEvent attributes will be visited.
 _json_event_attributes = [
     # cmp_sevent related attributes.
-    'name', 'pmu', 'topic', 'desc',
+    'name', 'topic', 'desc',
     # Seems useful, put it early.
     'event',
     # Short things in alphabetical order.
@@ -53,7 +53,7 @@ _json_event_attributes = [
 
 # Attributes that are in pmu_metric rather than pmu_event.
 _json_metric_attributes = [
-    'pmu', 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
+    'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
     'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
     'default_metricgroup_name', 'aggr_mode', 'event_grouping'
 ]
@@ -113,13 +113,24 @@ class BigCString:
   strings: Set[str]
   big_string: Sequence[str]
   offsets: Dict[str, int]
+  insert_number: int
+  insert_point: Dict[str, int]
+  metrics: Set[str]
 
   def __init__(self):
     self.strings = set()
+    self.insert_number = 0;
+    self.insert_point = {}
+    self.metrics = set()
 
-  def add(self, s: str) -> None:
+  def add(self, s: str, metric: bool) -> None:
     """Called to add to the big string."""
-    self.strings.add(s)
+    if s not in self.strings:
+      self.strings.add(s)
+      self.insert_point[s] = self.insert_number
+      self.insert_number += 1
+      if metric:
+        self.metrics.add(s)
 
   def compute(self) -> None:
     """Called once all strings are added to compute the string and offsets."""
@@ -160,8 +171,11 @@ class BigCString:
     self.big_string = []
     self.offsets = {}
 
+    def string_cmp_key(s: str) -> Tuple[bool, int, str]:
+      return (s in self.metrics, self.insert_point[s], s)
+
     # Emit all strings that aren't folded in a sorted manner.
-    for s in sorted(self.strings):
+    for s in sorted(self.strings, key=string_cmp_key):
       if s not in folded_strings:
         self.offsets[s] = big_string_offset
         self.big_string.append(f'/* offset={big_string_offset} */ "')
@@ -252,7 +266,7 @@ class JsonEvent:
     def unit_to_pmu(unit: str) -> Optional[str]:
       """Convert a JSON Unit to Linux PMU name."""
       if not unit:
-        return None
+        return 'default_core'
       # Comment brought over from jevents.c:
       # it's not realistic to keep adding these, we need something more scalable ...
       table = {
@@ -274,6 +288,7 @@ class JsonEvent:
           'DFPMC': 'amd_df',
           'cpu_core': 'cpu_core',
           'cpu_atom': 'cpu_atom',
+          'ali_drw': 'ali_drw',
       }
       return table[unit] if unit in table else f'uncore_{unit.lower()}'
 
@@ -342,16 +357,15 @@ class JsonEvent:
       self.desc += extra_desc
     if self.long_desc and extra_desc:
       self.long_desc += extra_desc
-    if self.pmu:
-      if self.desc and not self.desc.endswith('. '):
-        self.desc += '. '
-      self.desc = (self.desc if self.desc else '') + ('Unit: ' + self.pmu + ' ')
-    if arch_std and arch_std.lower() in _arch_std_events:
-      event = _arch_std_events[arch_std.lower()].event
-      # Copy from the architecture standard event to self for undefined fields.
-      for attr, value in _arch_std_events[arch_std.lower()].__dict__.items():
-        if hasattr(self, attr) and not getattr(self, attr):
-          setattr(self, attr, value)
+    if arch_std:
+      if arch_std.lower() in _arch_std_events:
+        event = _arch_std_events[arch_std.lower()].event
+        # Copy from the architecture standard event to self for undefined fields.
+        for attr, value in _arch_std_events[arch_std.lower()].__dict__.items():
+          if hasattr(self, attr) and not getattr(self, attr):
+            setattr(self, attr, value)
+      else:
+        raise argparse.ArgumentTypeError('Cannot find arch std event:', arch_std)
 
     self.event = real_event(self.name, event)
 
@@ -433,13 +447,13 @@ def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
 def print_pending_events() -> None:
   """Optionally close events table."""
 
-  def event_cmp_key(j: JsonEvent) -> Tuple[bool, str, str, str, str]:
+  def event_cmp_key(j: JsonEvent) -> Tuple[str, str, bool, str, str]:
     def fix_none(s: Optional[str]) -> str:
       if s is None:
         return ''
       return s
 
-    return (j.desc is not None, fix_none(j.topic), fix_none(j.name), fix_none(j.pmu),
+    return (fix_none(j.pmu).replace(',','_'), fix_none(j.name), j.desc is not None, fix_none(j.topic),
             fix_none(j.metric_name))
 
   global _pending_events
@@ -454,13 +468,36 @@ def print_pending_events() -> None:
     global event_tables
     _event_tables.append(_pending_events_tblname)
 
-  _args.output_file.write(
-      f'static const struct compact_pmu_event {_pending_events_tblname}[] = {{\n')
-
+  first = True
+  last_pmu = None
+  pmus = set()
   for event in sorted(_pending_events, key=event_cmp_key):
+    if event.pmu != last_pmu:
+      if not first:
+        _args.output_file.write('};\n')
+      pmu_name = event.pmu.replace(',', '_')
+      _args.output_file.write(
+          f'static const struct compact_pmu_event {_pending_events_tblname}_{pmu_name}[] = {{\n')
+      first = False
+      last_pmu = event.pmu
+      pmus.add((event.pmu, pmu_name))
+
     _args.output_file.write(event.to_c_string(metric=False))
   _pending_events = []
 
+  _args.output_file.write(f"""
+}};
+
+const struct pmu_table_entry {_pending_events_tblname}[] = {{
+""")
+  for (pmu, tbl_pmu) in sorted(pmus):
+    pmu_name = f"{pmu}\\000"
+    _args.output_file.write(f"""{{
+     .entries = {_pending_events_tblname}_{tbl_pmu},
+     .num_entries = ARRAY_SIZE({_pending_events_tblname}_{tbl_pmu}),
+     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
+}},
+""")
   _args.output_file.write('};\n\n')
 
 def print_pending_metrics() -> None:
@@ -486,13 +523,36 @@ def print_pending_metrics() -> None:
     global metric_tables
     _metric_tables.append(_pending_metrics_tblname)
 
-  _args.output_file.write(
-      f'static const struct compact_pmu_event {_pending_metrics_tblname}[] = {{\n')
-
+  first = True
+  last_pmu = None
+  pmus = set()
   for metric in sorted(_pending_metrics, key=metric_cmp_key):
+    if metric.pmu != last_pmu:
+      if not first:
+        _args.output_file.write('};\n')
+      pmu_name = metric.pmu.replace(',', '_')
+      _args.output_file.write(
+          f'static const struct compact_pmu_event {_pending_metrics_tblname}_{pmu_name}[] = {{\n')
+      first = False
+      last_pmu = metric.pmu
+      pmus.add((metric.pmu, pmu_name))
+
     _args.output_file.write(metric.to_c_string(metric=True))
   _pending_metrics = []
 
+  _args.output_file.write(f"""
+}};
+
+const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
+""")
+  for (pmu, tbl_pmu) in sorted(pmus):
+    pmu_name = f"{pmu}\\000"
+    _args.output_file.write(f"""{{
+     .entries = {_pending_metrics_tblname}_{tbl_pmu},
+     .num_entries = ARRAY_SIZE({_pending_metrics_tblname}_{tbl_pmu}),
+     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
+}},
+""")
   _args.output_file.write('};\n\n')
 
 def get_topic(topic: str) -> str:
@@ -521,17 +581,20 @@ def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
       assert len(mgroup) > 1, parents
       description = f"{metricgroup_descriptions[mgroup]}\\000"
       mgroup = f"{mgroup}\\000"
-      _bcs.add(mgroup)
-      _bcs.add(description)
+      _bcs.add(mgroup, metric=True)
+      _bcs.add(description, metric=True)
       _metricgroups[mgroup] = description
     return
 
   topic = get_topic(item.name)
   for event in read_json_events(item.path, topic):
+    pmu_name = f"{event.pmu}\\000"
     if event.name:
-      _bcs.add(event.build_c_string(metric=False))
+      _bcs.add(pmu_name, metric=False)
+      _bcs.add(event.build_c_string(metric=False), metric=False)
     if event.metric_name:
-      _bcs.add(event.build_c_string(metric=True))
+      _bcs.add(pmu_name, metric=True)
+      _bcs.add(event.build_c_string(metric=True), metric=True)
 
 def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
   """Process a JSON file during the main walk."""
@@ -573,14 +636,14 @@ def print_mapping_table(archs: Sequence[str]) -> None:
   _args.output_file.write("""
 /* Struct used to make the PMU event table implementation opaque to callers. */
 struct pmu_events_table {
-        const struct compact_pmu_event *entries;
-        size_t length;
+        const struct pmu_table_entry *pmus;
+        uint32_t num_pmus;
 };
 
 /* Struct used to make the PMU metric table implementation opaque to callers. */
 struct pmu_metrics_table {
-        const struct compact_pmu_event *entries;
-        size_t length;
+        const struct pmu_table_entry *pmus;
+        uint32_t num_pmus;
 };
 
 /*
@@ -610,12 +673,12 @@ const struct pmu_events_map pmu_events_map[] = {
 \t.arch = "testarch",
 \t.cpuid = "testcpu",
 \t.event_table = {
-\t\t.entries = pmu_events__test_soc_cpu,
-\t\t.length = ARRAY_SIZE(pmu_events__test_soc_cpu),
+\t\t.pmus = pmu_events__test_soc_cpu,
+\t\t.num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu),
 \t},
 \t.metric_table = {
-\t\t.entries = pmu_metrics__test_soc_cpu,
-\t\t.length = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
+\t\t.pmus = pmu_metrics__test_soc_cpu,
+\t\t.num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
 \t}
 },
 """)
@@ -645,12 +708,12 @@ const struct pmu_events_map pmu_events_map[] = {
 \t.arch = "{arch}",
 \t.cpuid = "{cpuid}",
 \t.event_table = {{
-\t\t.entries = {event_tblname},
-\t\t.length = {event_size}
+\t\t.pmus = {event_tblname},
+\t\t.num_pmus = {event_size}
 \t}},
 \t.metric_table = {{
-\t\t.entries = {metric_tblname},
-\t\t.length = {metric_size}
+\t\t.pmus = {metric_tblname},
+\t\t.num_pmus = {metric_size}
 \t}}
 }},
 """)
@@ -681,15 +744,15 @@ static const struct pmu_sys_events pmu_sys_event_tables[] = {
   for tblname in _sys_event_tables:
     _args.output_file.write(f"""\t{{
 \t\t.event_table = {{
-\t\t\t.entries = {tblname},
-\t\t\t.length = ARRAY_SIZE({tblname})
+\t\t\t.pmus = {tblname},
+\t\t\t.num_pmus = ARRAY_SIZE({tblname})
 \t\t}},""")
     metric_tblname = _sys_event_table_to_metric_table_mapping[tblname]
     if metric_tblname in _sys_metric_tables:
       _args.output_file.write(f"""
 \t\t.metric_table = {{
-\t\t\t.entries = {metric_tblname},
-\t\t\t.length = ARRAY_SIZE({metric_tblname})
+\t\t\t.pmus = {metric_tblname},
+\t\t\t.num_pmus = ARRAY_SIZE({metric_tblname})
 \t\t}},""")
       printed_metric_tables.append(metric_tblname)
     _args.output_file.write(f"""
@@ -749,15 +812,18 @@ static void decompress_metric(int offset, struct pmu_metric *pm)
       _args.output_file.write('\twhile (*p++);')
   _args.output_file.write("""}
 
-int pmu_events_table_for_each_event(const struct pmu_events_table *table,
-                                    pmu_event_iter_fn fn,
-                                    void *data)
+static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
+                                                const struct pmu_table_entry *pmu,
+                                                pmu_event_iter_fn fn,
+                                                void *data)
 {
-        for (size_t i = 0; i < table->length; i++) {
-                struct pmu_event pe;
-                int ret;
+        int ret;
+        struct pmu_event pe = {
+                .pmu = &big_c_string[pmu->pmu_name.offset],
+        };
 
-                decompress_event(table->entries[i].offset, &pe);
+        for (uint32_t i = 0; i < pmu->num_entries; i++) {
+                decompress_event(pmu->entries[i].offset, &pe);
                 if (!pe.name)
                         continue;
                 ret = fn(&pe, table, data);
@@ -765,17 +831,119 @@ int pmu_events_table_for_each_event(const struct pmu_events_table *table,
                         return ret;
         }
         return 0;
+ }
+
+static int pmu_events_table__find_event_pmu(const struct pmu_events_table *table,
+                                            const struct pmu_table_entry *pmu,
+                                            const char *name,
+                                            pmu_event_iter_fn fn,
+                                            void *data)
+{
+        struct pmu_event pe = {
+                .pmu = &big_c_string[pmu->pmu_name.offset],
+        };
+        int low = 0, high = pmu->num_entries - 1;
+
+        while (low <= high) {
+                int cmp, mid = (low + high) / 2;
+
+                decompress_event(pmu->entries[mid].offset, &pe);
+
+                if (!pe.name && !name)
+                        goto do_call;
+
+                if (!pe.name && name) {
+                        low = mid + 1;
+                        continue;
+                }
+                if (pe.name && !name) {
+                        high = mid - 1;
+                        continue;
+                }
+
+                cmp = strcasecmp(pe.name, name);
+                if (cmp < 0) {
+                        low = mid + 1;
+                        continue;
+                }
+                if (cmp > 0) {
+                        high = mid - 1;
+                        continue;
+                }
+  do_call:
+                return fn ? fn(&pe, table, data) : 0;
+        }
+        return -1000;
 }
 
-int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table,
-                                     pmu_metric_iter_fn fn,
-                                     void *data)
+int pmu_events_table__for_each_event(const struct pmu_events_table *table,
+                                    struct perf_pmu *pmu,
+                                    pmu_event_iter_fn fn,
+                                    void *data)
+{
+        for (size_t i = 0; i < table->num_pmus; i++) {
+                const struct pmu_table_entry *table_pmu = &table->pmus[i];
+                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
+                int ret;
+
+                if (pmu && !pmu__name_match(pmu, pmu_name))
+                        continue;
+
+                ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data);
+                if (pmu || ret)
+                        return ret;
+        }
+        return 0;
+}
+
+int pmu_events_table__find_event(const struct pmu_events_table *table,
+                                 struct perf_pmu *pmu,
+                                 const char *name,
+                                 pmu_event_iter_fn fn,
+                                 void *data)
 {
-        for (size_t i = 0; i < table->length; i++) {
-                struct pmu_metric pm;
+        for (size_t i = 0; i < table->num_pmus; i++) {
+                const struct pmu_table_entry *table_pmu = &table->pmus[i];
+                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
                 int ret;
 
-                decompress_metric(table->entries[i].offset, &pm);
+                if (!pmu__name_match(pmu, pmu_name))
+                        continue;
+
+                ret = pmu_events_table__find_event_pmu(table, table_pmu, name, fn, data);
+                if (ret != -1000)
+                        return ret;
+        }
+        return -1000;
+}
+
+size_t pmu_events_table__num_events(const struct pmu_events_table *table,
+                                    struct perf_pmu *pmu)
+{
+        size_t count = 0;
+
+        for (size_t i = 0; i < table->num_pmus; i++) {
+                const struct pmu_table_entry *table_pmu = &table->pmus[i];
+                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
+
+                if (pmu__name_match(pmu, pmu_name))
+                        count += table_pmu->num_entries;
+        }
+        return count;
+}
+
+static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table,
+                                                const struct pmu_table_entry *pmu,
+                                                pmu_metric_iter_fn fn,
+                                                void *data)
+{
+        int ret;
+        struct pmu_metric pm = {
+                .pmu = &big_c_string[pmu->pmu_name.offset],
+        };
+
+        for (uint32_t i = 0; i < pmu->num_entries; i++) {
+                decompress_metric(pmu->entries[i].offset, &pm);
                 if (!pm.metric_expr)
                         continue;
                 ret = fn(&pm, table, data);
@@ -785,11 +953,25 @@ int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table,
         return 0;
 }
 
+int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
+                                     pmu_metric_iter_fn fn,
+                                     void *data)
+{
+        for (size_t i = 0; i < table->num_pmus; i++) {
+                int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
+                                                                 fn, data);
+
+                if (ret)
+                        return ret;
+        }
+        return 0;
+}
+
 const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
 {
         const struct pmu_events_table *table = NULL;
         char *cpuid = perf_pmu__getcpuid(pmu);
-        int i;
+        size_t i;
 
         /* on some platforms which uses cpus map, cpuid can be NULL for
          * PMUs other than CORE PMUs.
@@ -809,7 +991,17 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
                 }
         }
         free(cpuid);
-        return table;
+        if (!pmu)
+                return table;
+
+        for (i = 0; i < table->num_pmus; i++) {
+                const struct pmu_table_entry *table_pmu = &table->pmus[i];
+                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
+
+                if (pmu__name_match(pmu, pmu_name))
+                        return table;
+        }
+        return NULL;
 }
 
 const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu)
@@ -866,7 +1058,8 @@ int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
         for (const struct pmu_events_map *tables = &pmu_events_map[0];
              tables->arch;
              tables++) {
-                int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data);
+                int ret = pmu_events_table__for_each_event(&tables->event_table,
+                                                           /*pmu=*/ NULL, fn, data);
 
                 if (ret)
                         return ret;
@@ -879,7 +1072,7 @@ int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
         for (const struct pmu_events_map *tables = &pmu_events_map[0];
              tables->arch;
              tables++) {
-                int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data);
+                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
 
                 if (ret)
                         return ret;
@@ -903,7 +1096,8 @@ int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
         for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
              tables->name;
              tables++) {
-                int ret = pmu_events_table_for_each_event(&tables->event_table, fn, data);
+                int ret = pmu_events_table__for_each_event(&tables->event_table,
+                                                           /*pmu=*/ NULL, fn, data);
 
                 if (ret)
                         return ret;
@@ -916,7 +1110,7 @@ int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
         for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
              tables->name;
              tables++) {
-                int ret = pmu_metrics_table_for_each_metric(&tables->metric_table, fn, data);
+                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
 
                 if (ret)
                         return ret;
@@ -999,14 +1193,20 @@ such as "arm/cortex-a34".''',
   _args = ap.parse_args()
 
   _args.output_file.write("""
-#include "pmu-events/pmu-events.h"
+#include <pmu-events/pmu-events.h>
 #include "util/header.h"
 #include "util/pmu.h"
 #include <string.h>
 #include <stddef.h>
 
 struct compact_pmu_event {
-  int offset;
+        int offset;
+};
+
+struct pmu_table_entry {
+        const struct compact_pmu_event *entries;
+        uint32_t num_entries;
+        struct compact_pmu_event pmu_name;
 };
 
 """)
index 85a3545..0e9ec65 100644 (file)
@@ -413,6 +413,10 @@ def has_event(event: Event) -> Function:
   # pylint: disable=invalid-name
   return Function('has_event', event)
 
+def strcmp_cpuid_str(event: str) -> Function:
+  # pylint: disable=redefined-builtin
+  # pylint: disable=invalid-name
+  return Function('strcmp_cpuid_str', event)
 
 class Metric:
   """An individual metric that will specifiable on the perf command line."""
@@ -541,14 +545,23 @@ def ParsePerfJson(orig: str) -> Expression:
   """
   # pylint: disable=eval-used
   py = orig.strip()
+  # First try to convert everything that looks like a string (event name) into Event(r"EVENT_NAME").
+  # This isn't very selective so is followed up by converting some unwanted conversions back again
   py = re.sub(r'([a-zA-Z][^-+/\* \\\(\),]*(?:\\.[^-+/\* \\\(\),]*)*)',
               r'Event(r"\1")', py)
+  # If it started with a # it should have been a literal, rather than an event name
   py = re.sub(r'#Event\(r"([^"]*)"\)', r'Literal("#\1")', py)
+  # Convert accidentally converted hex constants ("0Event(r"xDEADBEEF)"") back to a constant,
+  # but keep it wrapped in Event(), otherwise Python drops the 0x prefix and it gets interpreted as
+  # a double by the Bison parser
+  py = re.sub(r'0Event\(r"[xX]([0-9a-fA-F]*)"\)', r'Event("0x\1")', py)
+  # Convert accidentally converted scientific notation constants back
   py = re.sub(r'([0-9]+)Event\(r"(e[0-9]+)"\)', r'\1\2', py)
-  keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count', 'has_event']
+  # Convert all the known keywords back from events to just the keyword
+  keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count', 'has_event', 'strcmp_cpuid_str',
+              'cpuid_not_more_than']
   for kw in keywords:
     py = re.sub(rf'Event\(r"{kw}"\)', kw, py)
-
   try:
     parsed = ast.parse(py, mode='eval')
   except SyntaxError as e:
index caf59f2..f5aa96f 100644 (file)
@@ -3,6 +3,7 @@
 #define PMU_EVENTS_H
 
 #include <stdbool.h>
+#include <stddef.h>
 
 struct perf_pmu;
 
@@ -77,9 +78,19 @@ typedef int (*pmu_metric_iter_fn)(const struct pmu_metric *pm,
                                  const struct pmu_metrics_table *table,
                                  void *data);
 
-int pmu_events_table_for_each_event(const struct pmu_events_table *table, pmu_event_iter_fn fn,
+int pmu_events_table__for_each_event(const struct pmu_events_table *table,
+                                   struct perf_pmu *pmu,
+                                   pmu_event_iter_fn fn,
                                    void *data);
-int pmu_metrics_table_for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
+int pmu_events_table__find_event(const struct pmu_events_table *table,
+                                 struct perf_pmu *pmu,
+                                 const char *name,
+                                 pmu_event_iter_fn fn,
+                                void *data);
+size_t pmu_events_table__num_events(const struct pmu_events_table *table,
+                                   struct perf_pmu *pmu);
+
+int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
                                     void *data);
 
 const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu);
index 7d0e33c..5b0b5ff 100644 (file)
@@ -1,3 +1,4 @@
 perf-y += Context.o
 
-CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
+# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance)
+CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-declaration-after-statement
index 7384dcb..b75d318 100644 (file)
@@ -54,6 +54,7 @@ try:
        import audit
        machine_to_id = {
                'x86_64': audit.MACH_86_64,
+               'aarch64': audit.MACH_AARCH64,
                'alpha' : audit.MACH_ALPHA,
                'ia64'  : audit.MACH_IA64,
                'ppc'   : audit.MACH_PPC,
@@ -73,9 +74,9 @@ try:
 except:
        if not audit_package_warned:
                audit_package_warned = True
-               print("Install the audit-libs-python package to get syscall names.\n"
-                    "For example:\n  # apt-get install python-audit (Ubuntu)"
-                    "\n  # yum install audit-libs-python (Fedora)"
+               print("Install the python-audit package to get syscall names.\n"
+                    "For example:\n  # apt-get install python3-audit (Ubuntu)"
+                    "\n  # yum install python3-audit (Fedora)"
                     "\n  etc.\n")
 
 def syscall_name(id):
diff --git a/tools/perf/scripts/python/bin/gecko-record b/tools/perf/scripts/python/bin/gecko-record
new file mode 100644 (file)
index 0000000..f0d1aa5
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -F 99 -g "$@"
diff --git a/tools/perf/scripts/python/bin/gecko-report b/tools/perf/scripts/python/bin/gecko-report
new file mode 100755 (executable)
index 0000000..1867ec8
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/bash
+# description: create firefox gecko profile json format from perf.data
+if [ "$*" = "-i -" ]; then
+perf script -s "$PERF_EXEC_PATH"/scripts/python/gecko.py
+else
+perf script -s "$PERF_EXEC_PATH"/scripts/python/gecko.py -- "$@"
+fi
diff --git a/tools/perf/scripts/python/gecko.py b/tools/perf/scripts/python/gecko.py
new file mode 100644 (file)
index 0000000..bc5a72f
--- /dev/null
@@ -0,0 +1,395 @@
+# gecko.py - Convert perf record output to Firefox's gecko profile format
+# SPDX-License-Identifier: GPL-2.0
+#
+# The script converts perf.data to Gecko Profile Format,
+# which can be read by https://profiler.firefox.com/.
+#
+# Usage:
+#
+#     perf record -a -g -F 99 sleep 60
+#     perf script report gecko
+#
+# Combined:
+#
+#     perf script gecko -F 99 -a sleep 60
+
+import os
+import sys
+import time
+import json
+import string
+import random
+import argparse
+import threading
+import webbrowser
+import urllib.parse
+from os import system
+from functools import reduce
+from dataclasses import dataclass, field
+from http.server import HTTPServer, SimpleHTTPRequestHandler, test
+from typing import List, Dict, Optional, NamedTuple, Set, Tuple, Any
+
+# Add the Perf-Trace-Util library to the Python path
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+StringID = int
+StackID = int
+FrameID = int
+CategoryID = int
+Milliseconds = float
+
+# start_time is intialiazed only once for the all event traces.
+start_time = None
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425
+# Follow Brendan Gregg's Flamegraph convention: orange for kernel and yellow for user space by default.
+CATEGORIES = None
+
+# The product name is used by the profiler UI to show the Operating system and Processor.
+PRODUCT = os.popen('uname -op').read().strip()
+
+# store the output file
+output_file = None
+
+# Here key = tid, value = Thread
+tid_to_thread = dict()
+
+# The HTTP server is used to serve the profile to the profiler UI.
+http_server_thread = None
+
+# The category index is used by the profiler UI to show the color of the flame graph.
+USER_CATEGORY_INDEX = 0
+KERNEL_CATEGORY_INDEX = 1
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
+class Frame(NamedTuple):
+       string_id: StringID
+       relevantForJS: bool
+       innerWindowID: int
+       implementation: None
+       optimizations: None
+       line: None
+       column: None
+       category: CategoryID
+       subcategory: int
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
+class Stack(NamedTuple):
+       prefix_id: Optional[StackID]
+       frame_id: FrameID
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
+class Sample(NamedTuple):
+       stack_id: Optional[StackID]
+       time_ms: Milliseconds
+       responsiveness: int
+
+@dataclass
+class Thread:
+       """A builder for a profile of the thread.
+
+       Attributes:
+               comm: Thread command-line (name).
+               pid: process ID of containing process.
+               tid: thread ID.
+               samples: Timeline of profile samples.
+               frameTable: interned stack frame ID -> stack frame.
+               stringTable: interned string ID -> string.
+               stringMap: interned string -> string ID.
+               stackTable: interned stack ID -> stack.
+               stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID.
+               frameMap: Stack Frame string -> interned Frame ID.
+               comm: str
+               pid: int
+               tid: int
+               samples: List[Sample] = field(default_factory=list)
+               frameTable: List[Frame] = field(default_factory=list)
+               stringTable: List[str] = field(default_factory=list)
+               stringMap: Dict[str, int] = field(default_factory=dict)
+               stackTable: List[Stack] = field(default_factory=list)
+               stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict)
+               frameMap: Dict[str, int] = field(default_factory=dict)
+       """
+       comm: str
+       pid: int
+       tid: int
+       samples: List[Sample] = field(default_factory=list)
+       frameTable: List[Frame] = field(default_factory=list)
+       stringTable: List[str] = field(default_factory=list)
+       stringMap: Dict[str, int] = field(default_factory=dict)
+       stackTable: List[Stack] = field(default_factory=list)
+       stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict)
+       frameMap: Dict[str, int] = field(default_factory=dict)
+
+       def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int:
+               """Gets a matching stack, or saves the new stack. Returns a Stack ID."""
+               key = f"{frame_id}" if prefix_id is None else f"{frame_id},{prefix_id}"
+               # key = (prefix_id, frame_id)
+               stack_id = self.stackMap.get(key)
+               if stack_id is None:
+                       # return stack_id
+                       stack_id = len(self.stackTable)
+                       self.stackTable.append(Stack(prefix_id=prefix_id, frame_id=frame_id))
+                       self.stackMap[key] = stack_id
+               return stack_id
+
+       def _intern_string(self, string: str) -> int:
+               """Gets a matching string, or saves the new string. Returns a String ID."""
+               string_id = self.stringMap.get(string)
+               if string_id is not None:
+                       return string_id
+               string_id = len(self.stringTable)
+               self.stringTable.append(string)
+               self.stringMap[string] = string_id
+               return string_id
+
+       def _intern_frame(self, frame_str: str) -> int:
+               """Gets a matching stack frame, or saves the new frame. Returns a Frame ID."""
+               frame_id = self.frameMap.get(frame_str)
+               if frame_id is not None:
+                       return frame_id
+               frame_id = len(self.frameTable)
+               self.frameMap[frame_str] = frame_id
+               string_id = self._intern_string(frame_str)
+
+               symbol_name_to_category = KERNEL_CATEGORY_INDEX if frame_str.find('kallsyms') != -1 \
+               or frame_str.find('/vmlinux') != -1 \
+               or frame_str.endswith('.ko)') \
+               else USER_CATEGORY_INDEX
+
+               self.frameTable.append(Frame(
+                       string_id=string_id,
+                       relevantForJS=False,
+                       innerWindowID=0,
+                       implementation=None,
+                       optimizations=None,
+                       line=None,
+                       column=None,
+                       category=symbol_name_to_category,
+                       subcategory=None,
+               ))
+               return frame_id
+
+       def _add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None:
+               """Add a timestamped stack trace sample to the thread builder.
+               Args:
+                       comm: command-line (name) of the thread at this sample
+                       stack: sampled stack frames. Root first, leaf last.
+                       time_ms: timestamp of sample in milliseconds.
+               """
+               # Ihreads may not set their names right after they are created.
+               # Instead, they might do it later. In such situations, to use the latest name they have set.
+               if self.comm != comm:
+                       self.comm = comm
+
+               prefix_stack_id = reduce(lambda prefix_id, frame: self._intern_stack
+                                               (self._intern_frame(frame), prefix_id), stack, None)
+               if prefix_stack_id is not None:
+                       self.samples.append(Sample(stack_id=prefix_stack_id,
+                                                                       time_ms=time_ms,
+                                                                       responsiveness=0))
+
+       def _to_json_dict(self) -> Dict:
+               """Converts current Thread to GeckoThread JSON format."""
+               # Gecko profile format is row-oriented data as List[List],
+               # And a schema for interpreting each index.
+               # Schema:
+               # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md
+               # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230
+               return {
+                       "tid": self.tid,
+                       "pid": self.pid,
+                       "name": self.comm,
+                       # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51
+                       "markers": {
+                               "schema": {
+                                       "name": 0,
+                                       "startTime": 1,
+                                       "endTime": 2,
+                                       "phase": 3,
+                                       "category": 4,
+                                       "data": 5,
+                               },
+                               "data": [],
+                       },
+
+                       # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
+                       "samples": {
+                               "schema": {
+                                       "stack": 0,
+                                       "time": 1,
+                                       "responsiveness": 2,
+                               },
+                               "data": self.samples
+                       },
+
+                       # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
+                       "frameTable": {
+                               "schema": {
+                                       "location": 0,
+                                       "relevantForJS": 1,
+                                       "innerWindowID": 2,
+                                       "implementation": 3,
+                                       "optimizations": 4,
+                                       "line": 5,
+                                       "column": 6,
+                                       "category": 7,
+                                       "subcategory": 8,
+                               },
+                               "data": self.frameTable,
+                       },
+
+                       # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
+                       "stackTable": {
+                               "schema": {
+                                       "prefix": 0,
+                                       "frame": 1,
+                               },
+                               "data": self.stackTable,
+                       },
+                       "stringTable": self.stringTable,
+                       "registerTime": 0,
+                       "unregisterTime": None,
+                       "processType": "default",
+               }
+
+# Uses perf script python interface to parse each
+# event and store the data in the thread builder.
+def process_event(param_dict: Dict) -> None:
+       global start_time
+       global tid_to_thread
+       time_stamp = (param_dict['sample']['time'] // 1000) / 1000
+       pid = param_dict['sample']['pid']
+       tid = param_dict['sample']['tid']
+       comm = param_dict['comm']
+
+       # Start time is the time of the first sample
+       if not start_time:
+               start_time = time_stamp
+
+       # Parse and append the callchain of the current sample into a stack.
+       stack = []
+       if param_dict['callchain']:
+               for call in param_dict['callchain']:
+                       if 'sym' not in call:
+                               continue
+                       stack.append(f'{call["sym"]["name"]} (in {call["dso"]})')
+               if len(stack) != 0:
+                       # Reverse the stack, as root come first and the leaf at the end.
+                       stack = stack[::-1]
+
+       # During perf record if -g is not used, the callchain is not available.
+       # In that case, the symbol and dso are available in the event parameters.
+       else:
+               func = param_dict['symbol'] if 'symbol' in param_dict else '[unknown]'
+               dso = param_dict['dso'] if 'dso' in param_dict else '[unknown]'
+               stack.append(f'{func} (in {dso})')
+
+       # Add sample to the specific thread.
+       thread = tid_to_thread.get(tid)
+       if thread is None:
+               thread = Thread(comm=comm, pid=pid, tid=tid)
+               tid_to_thread[tid] = thread
+       thread._add_sample(comm=comm, stack=stack, time_ms=time_stamp)
+
+def trace_begin() -> None:
+       global output_file
+       if (output_file is None):
+               print("Staring Firefox Profiler on your default browser...")
+               global http_server_thread
+               http_server_thread = threading.Thread(target=test, args=(CORSRequestHandler, HTTPServer,))
+               http_server_thread.daemon = True
+               http_server_thread.start()
+
+# Trace_end runs at the end and will be used to aggregate
+# the data into the final json object and print it out to stdout.
+def trace_end() -> None:
+       global output_file
+       threads = [thread._to_json_dict() for thread in tid_to_thread.values()]
+
+       # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305
+       gecko_profile_with_meta = {
+               "meta": {
+                       "interval": 1,
+                       "processType": 0,
+                       "product": PRODUCT,
+                       "stackwalk": 1,
+                       "debug": 0,
+                       "gcpoison": 0,
+                       "asyncstack": 1,
+                       "startTime": start_time,
+                       "shutdownTime": None,
+                       "version": 24,
+                       "presymbolicated": True,
+                       "categories": CATEGORIES,
+                       "markerSchema": [],
+                       },
+               "libs": [],
+               "threads": threads,
+               "processes": [],
+               "pausedRanges": [],
+       }
+       # launch the profiler on local host if not specified --save-only args, otherwise print to file
+       if (output_file is None):
+               output_file = 'gecko_profile.json'
+               with open(output_file, 'w') as f:
+                       json.dump(gecko_profile_with_meta, f, indent=2)
+               launchFirefox(output_file)
+               time.sleep(1)
+               print(f'[ perf gecko: Captured and wrote into {output_file} ]')
+       else:
+               print(f'[ perf gecko: Captured and wrote into {output_file} ]')
+               with open(output_file, 'w') as f:
+                       json.dump(gecko_profile_with_meta, f, indent=2)
+
+# Used to enable Cross-Origin Resource Sharing (CORS) for requests coming from 'https://profiler.firefox.com', allowing it to access resources from this server.
+class CORSRequestHandler(SimpleHTTPRequestHandler):
+       def end_headers (self):
+               self.send_header('Access-Control-Allow-Origin', 'https://profiler.firefox.com')
+               SimpleHTTPRequestHandler.end_headers(self)
+
+# start a local server to serve the gecko_profile.json file to the profiler.firefox.com
+def launchFirefox(file):
+       safe_string = urllib.parse.quote_plus(f'http://localhost:8000/{file}')
+       url = 'https://profiler.firefox.com/from-url/' + safe_string
+       webbrowser.open(f'{url}')
+
+def main() -> None:
+       global output_file
+       global CATEGORIES
+       parser = argparse.ArgumentParser(description="Convert perf.data to Firefox\'s Gecko Profile format which can be uploaded to profiler.firefox.com for visualization")
+
+       # Add the command-line options
+       # Colors must be defined according to this:
+       # https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css
+       parser.add_argument('--user-color', default='yellow', help='Color for the User category', choices=['yellow', 'blue', 'purple', 'green', 'orange', 'red', 'grey', 'magenta'])
+       parser.add_argument('--kernel-color', default='orange', help='Color for the Kernel category', choices=['yellow', 'blue', 'purple', 'green', 'orange', 'red', 'grey', 'magenta'])
+       # If --save-only is specified, the output will be saved to a file instead of opening Firefox's profiler directly.
+       parser.add_argument('--save-only', help='Save the output to a file instead of opening Firefox\'s profiler')
+
+       # Parse the command-line arguments
+       args = parser.parse_args()
+       # Access the values provided by the user
+       user_color = args.user_color
+       kernel_color = args.kernel_color
+       output_file = args.save_only
+
+       CATEGORIES = [
+               {
+                       "name": 'User',
+                       "color": user_color,
+                       "subcategories": ['Other']
+               },
+               {
+                       "name": 'Kernel',
+                       "color": kernel_color,
+                       "subcategories": ['Other']
+               },
+       ]
+
+if __name__ == '__main__':
+       main()
diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
deleted file mode 100644 (file)
index d053b32..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-llvm-src-base.c
-llvm-src-kbuild.c
-llvm-src-prologue.c
-llvm-src-relocation.c
index fb9ac5d..63d5e6d 100644 (file)
@@ -37,8 +37,6 @@ perf-y += sample-parsing.o
 perf-y += parse-no-sample-id-all.o
 perf-y += kmod-path.o
 perf-y += thread-map.o
-perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o
-perf-y += bpf.o
 perf-y += topology.o
 perf-y += mem.o
 perf-y += cpumap.o
@@ -51,7 +49,6 @@ perf-y += sdt.o
 perf-y += is_printable_array.o
 perf-y += bitmap.o
 perf-y += perf-hooks.o
-perf-y += clang.o
 perf-y += unit_number__scnprintf.o
 perf-y += mem2node.o
 perf-y += maps.o
@@ -70,34 +67,6 @@ perf-y += sigtrap.o
 perf-y += event_groups.o
 perf-y += symbols.o
 
-$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
-       $(call rule_mkdir)
-       $(Q)echo '#include <tests/llvm.h>' > $@
-       $(Q)echo 'const char test_llvm__bpf_base_prog[] =' >> $@
-       $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
-       $(Q)echo ';' >> $@
-
-$(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c tests/Build
-       $(call rule_mkdir)
-       $(Q)echo '#include <tests/llvm.h>' > $@
-       $(Q)echo 'const char test_llvm__bpf_test_kbuild_prog[] =' >> $@
-       $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
-       $(Q)echo ';' >> $@
-
-$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build
-       $(call rule_mkdir)
-       $(Q)echo '#include <tests/llvm.h>' > $@
-       $(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@
-       $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
-       $(Q)echo ';' >> $@
-
-$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build
-       $(call rule_mkdir)
-       $(Q)echo '#include <tests/llvm.h>' > $@
-       $(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@
-       $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
-       $(Q)echo ';' >> $@
-
 ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c
deleted file mode 100644 (file)
index b638cc9..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * bpf-script-example.c
- * Test basic LLVM building
- */
-#ifndef LINUX_VERSION_CODE
-# error Need LINUX_VERSION_CODE
-# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
-#endif
-#define BPF_ANY 0
-#define BPF_MAP_TYPE_ARRAY 2
-#define BPF_FUNC_map_lookup_elem 1
-#define BPF_FUNC_map_update_elem 2
-
-static void *(*bpf_map_lookup_elem)(void *map, void *key) =
-       (void *) BPF_FUNC_map_lookup_elem;
-static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
-       (void *) BPF_FUNC_map_update_elem;
-
-/*
- * Following macros are taken from tools/lib/bpf/bpf_helpers.h,
- * and are used to create BTF defined maps. It is easier to take
- * 2 simple macros, than being able to include above header in
- * runtime.
- *
- * __uint - defines integer attribute of BTF map definition,
- * Such attributes are represented using a pointer to an array,
- * in which dimensionality of array encodes specified integer
- * value.
- *
- * __type - defines pointer variable with typeof(val) type for
- * attributes like key or value, which will be defined by the
- * size of the type.
- */
-#define __uint(name, val) int (*name)[val]
-#define __type(name, val) typeof(val) *name
-
-#define SEC(NAME) __attribute__((section(NAME), used))
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __uint(max_entries, 1);
-       __type(key, int);
-       __type(value, int);
-} flip_table SEC(".maps");
-
-SEC("syscalls:sys_enter_epoll_pwait")
-int bpf_func__SyS_epoll_pwait(void *ctx)
-{
-       int ind =0;
-       int *flag = bpf_map_lookup_elem(&flip_table, &ind);
-       int new_flag;
-       if (!flag)
-               return 0;
-       /* flip flag and store back */
-       new_flag = !*flag;
-       bpf_map_update_elem(&flip_table, &ind, &new_flag, BPF_ANY);
-       return new_flag;
-}
-char _license[] SEC("license") = "GPL";
-int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c
deleted file mode 100644 (file)
index 219673a..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * bpf-script-test-kbuild.c
- * Test include from kernel header
- */
-#ifndef LINUX_VERSION_CODE
-# error Need LINUX_VERSION_CODE
-# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
-#endif
-#define SEC(NAME) __attribute__((section(NAME), used))
-
-#include <uapi/linux/fs.h>
-
-SEC("func=vfs_llseek")
-int bpf_func__vfs_llseek(void *ctx)
-{
-       return 0;
-}
-
-char _license[] SEC("license") = "GPL";
-int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
deleted file mode 100644 (file)
index 91778b5..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * bpf-script-test-prologue.c
- * Test BPF prologue
- */
-#ifndef LINUX_VERSION_CODE
-# error Need LINUX_VERSION_CODE
-# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
-#endif
-#define SEC(NAME) __attribute__((section(NAME), used))
-
-#include <uapi/linux/fs.h>
-
-/*
- * If CONFIG_PROFILE_ALL_BRANCHES is selected,
- * 'if' is redefined after include kernel header.
- * Recover 'if' for BPF object code.
- */
-#ifdef if
-# undef if
-#endif
-
-typedef unsigned int __bitwise fmode_t;
-
-#define FMODE_READ             0x1
-#define FMODE_WRITE            0x2
-
-static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
-       (void *) 6;
-
-SEC("func=null_lseek file->f_mode offset orig")
-int bpf_func__null_lseek(void *ctx, int err, unsigned long _f_mode,
-                        unsigned long offset, unsigned long orig)
-{
-       fmode_t f_mode = (fmode_t)_f_mode;
-
-       if (err)
-               return 0;
-       if (f_mode & FMODE_WRITE)
-               return 0;
-       if (offset & 1)
-               return 0;
-       if (orig == SEEK_CUR)
-               return 0;
-       return 1;
-}
-
-char _license[] SEC("license") = "GPL";
-int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c
deleted file mode 100644 (file)
index 74006e4..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * bpf-script-test-relocation.c
- * Test BPF loader checking relocation
- */
-#ifndef LINUX_VERSION_CODE
-# error Need LINUX_VERSION_CODE
-# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
-#endif
-#define BPF_ANY 0
-#define BPF_MAP_TYPE_ARRAY 2
-#define BPF_FUNC_map_lookup_elem 1
-#define BPF_FUNC_map_update_elem 2
-
-static void *(*bpf_map_lookup_elem)(void *map, void *key) =
-       (void *) BPF_FUNC_map_lookup_elem;
-static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
-       (void *) BPF_FUNC_map_update_elem;
-
-struct bpf_map_def {
-       unsigned int type;
-       unsigned int key_size;
-       unsigned int value_size;
-       unsigned int max_entries;
-};
-
-#define SEC(NAME) __attribute__((section(NAME), used))
-struct bpf_map_def SEC("maps") my_table = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 1,
-};
-
-int this_is_a_global_val;
-
-SEC("func=sys_write")
-int bpf_func__sys_write(void *ctx)
-{
-       int key = 0;
-       int value = 0;
-
-       /*
-        * Incorrect relocation. Should not allow this program be
-        * loaded into kernel.
-        */
-       bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0);
-       return 0;
-}
-char _license[] SEC("license") = "GPL";
-int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
deleted file mode 100644 (file)
index 8beb460..0000000
+++ /dev/null
@@ -1,389 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/epoll.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <util/record.h>
-#include <util/util.h>
-#include <util/bpf-loader.h>
-#include <util/evlist.h>
-#include <linux/filter.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <api/fs/fs.h>
-#include <perf/mmap.h>
-#include "tests.h"
-#include "llvm.h"
-#include "debug.h"
-#include "parse-events.h"
-#include "util/mmap.h"
-#define NR_ITERS       111
-#define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test"
-
-#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-
-static int epoll_pwait_loop(void)
-{
-       int i;
-
-       /* Should fail NR_ITERS times */
-       for (i = 0; i < NR_ITERS; i++)
-               epoll_pwait(-(i + 1), NULL, 0, 0, NULL);
-       return 0;
-}
-
-#ifdef HAVE_BPF_PROLOGUE
-
-static int llseek_loop(void)
-{
-       int fds[2], i;
-
-       fds[0] = open("/dev/null", O_RDONLY);
-       fds[1] = open("/dev/null", O_RDWR);
-
-       if (fds[0] < 0 || fds[1] < 0)
-               return -1;
-
-       for (i = 0; i < NR_ITERS; i++) {
-               lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
-               lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
-       }
-       close(fds[0]);
-       close(fds[1]);
-       return 0;
-}
-
-#endif
-
-static struct {
-       enum test_llvm__testcase prog_id;
-       const char *name;
-       const char *msg_compile_fail;
-       const char *msg_load_fail;
-       int (*target_func)(void);
-       int expect_result;
-       bool    pin;
-} bpf_testcase_table[] = {
-       {
-               .prog_id          = LLVM_TESTCASE_BASE,
-               .name             = "[basic_bpf_test]",
-               .msg_compile_fail = "fix 'perf test LLVM' first",
-               .msg_load_fail    = "load bpf object failed",
-               .target_func      = &epoll_pwait_loop,
-               .expect_result    = (NR_ITERS + 1) / 2,
-       },
-       {
-               .prog_id          = LLVM_TESTCASE_BASE,
-               .name             = "[bpf_pinning]",
-               .msg_compile_fail = "fix kbuild first",
-               .msg_load_fail    = "check your vmlinux setting?",
-               .target_func      = &epoll_pwait_loop,
-               .expect_result    = (NR_ITERS + 1) / 2,
-               .pin              = true,
-       },
-#ifdef HAVE_BPF_PROLOGUE
-       {
-               .prog_id          = LLVM_TESTCASE_BPF_PROLOGUE,
-               .name             = "[bpf_prologue_test]",
-               .msg_compile_fail = "fix kbuild first",
-               .msg_load_fail    = "check your vmlinux setting?",
-               .target_func      = &llseek_loop,
-               .expect_result    = (NR_ITERS + 1) / 4,
-       },
-#endif
-};
-
-static int do_test(struct bpf_object *obj, int (*func)(void),
-                  int expect)
-{
-       struct record_opts opts = {
-               .target = {
-                       .uid = UINT_MAX,
-                       .uses_mmap = true,
-               },
-               .freq         = 0,
-               .mmap_pages   = 256,
-               .default_interval = 1,
-       };
-
-       char pid[16];
-       char sbuf[STRERR_BUFSIZE];
-       struct evlist *evlist;
-       int i, ret = TEST_FAIL, err = 0, count = 0;
-
-       struct parse_events_state parse_state;
-       struct parse_events_error parse_error;
-
-       parse_events_error__init(&parse_error);
-       bzero(&parse_state, sizeof(parse_state));
-       parse_state.error = &parse_error;
-       INIT_LIST_HEAD(&parse_state.list);
-
-       err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL);
-       parse_events_error__exit(&parse_error);
-       if (err == -ENODATA) {
-               pr_debug("Failed to add events selected by BPF, debuginfo package not installed\n");
-               return TEST_SKIP;
-       }
-       if (err || list_empty(&parse_state.list)) {
-               pr_debug("Failed to add events selected by BPF\n");
-               return TEST_FAIL;
-       }
-
-       snprintf(pid, sizeof(pid), "%d", getpid());
-       pid[sizeof(pid) - 1] = '\0';
-       opts.target.tid = opts.target.pid = pid;
-
-       /* Instead of evlist__new_default, don't add default events */
-       evlist = evlist__new();
-       if (!evlist) {
-               pr_debug("Not enough memory to create evlist\n");
-               return TEST_FAIL;
-       }
-
-       err = evlist__create_maps(evlist, &opts.target);
-       if (err < 0) {
-               pr_debug("Not enough memory to create thread/cpu maps\n");
-               goto out_delete_evlist;
-       }
-
-       evlist__splice_list_tail(evlist, &parse_state.list);
-
-       evlist__config(evlist, &opts, NULL);
-
-       err = evlist__open(evlist);
-       if (err < 0) {
-               pr_debug("perf_evlist__open: %s\n",
-                        str_error_r(errno, sbuf, sizeof(sbuf)));
-               goto out_delete_evlist;
-       }
-
-       err = evlist__mmap(evlist, opts.mmap_pages);
-       if (err < 0) {
-               pr_debug("evlist__mmap: %s\n",
-                        str_error_r(errno, sbuf, sizeof(sbuf)));
-               goto out_delete_evlist;
-       }
-
-       evlist__enable(evlist);
-       (*func)();
-       evlist__disable(evlist);
-
-       for (i = 0; i < evlist->core.nr_mmaps; i++) {
-               union perf_event *event;
-               struct mmap *md;
-
-               md = &evlist->mmap[i];
-               if (perf_mmap__read_init(&md->core) < 0)
-                       continue;
-
-               while ((event = perf_mmap__read_event(&md->core)) != NULL) {
-                       const u32 type = event->header.type;
-
-                       if (type == PERF_RECORD_SAMPLE)
-                               count ++;
-               }
-               perf_mmap__read_done(&md->core);
-       }
-
-       if (count != expect * evlist->core.nr_entries) {
-               pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect * evlist->core.nr_entries, count);
-               goto out_delete_evlist;
-       }
-
-       ret = TEST_OK;
-
-out_delete_evlist:
-       evlist__delete(evlist);
-       return ret;
-}
-
-static struct bpf_object *
-prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name)
-{
-       struct bpf_object *obj;
-
-       obj = bpf__prepare_load_buffer(obj_buf, obj_buf_sz, name);
-       if (IS_ERR(obj)) {
-               pr_debug("Compile BPF program failed.\n");
-               return NULL;
-       }
-       return obj;
-}
-
-static int __test__bpf(int idx)
-{
-       int ret;
-       void *obj_buf;
-       size_t obj_buf_sz;
-       struct bpf_object *obj;
-
-       ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
-                                      bpf_testcase_table[idx].prog_id,
-                                      false, NULL);
-       if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
-               pr_debug("Unable to get BPF object, %s\n",
-                        bpf_testcase_table[idx].msg_compile_fail);
-               if ((idx == 0) || (ret == TEST_SKIP))
-                       return TEST_SKIP;
-               else
-                       return TEST_FAIL;
-       }
-
-       obj = prepare_bpf(obj_buf, obj_buf_sz,
-                         bpf_testcase_table[idx].name);
-       if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) {
-               if (!obj)
-                       pr_debug("Fail to load BPF object: %s\n",
-                                bpf_testcase_table[idx].msg_load_fail);
-               else
-                       pr_debug("Success unexpectedly: %s\n",
-                                bpf_testcase_table[idx].msg_load_fail);
-               ret = TEST_FAIL;
-               goto out;
-       }
-
-       if (obj) {
-               ret = do_test(obj,
-                             bpf_testcase_table[idx].target_func,
-                             bpf_testcase_table[idx].expect_result);
-               if (ret != TEST_OK)
-                       goto out;
-               if (bpf_testcase_table[idx].pin) {
-                       int err;
-
-                       if (!bpf_fs__mount()) {
-                               pr_debug("BPF filesystem not mounted\n");
-                               ret = TEST_FAIL;
-                               goto out;
-                       }
-                       err = mkdir(PERF_TEST_BPF_PATH, 0777);
-                       if (err && errno != EEXIST) {
-                               pr_debug("Failed to make perf_test dir: %s\n",
-                                        strerror(errno));
-                               ret = TEST_FAIL;
-                               goto out;
-                       }
-                       if (bpf_object__pin(obj, PERF_TEST_BPF_PATH))
-                               ret = TEST_FAIL;
-                       if (rm_rf(PERF_TEST_BPF_PATH))
-                               ret = TEST_FAIL;
-               }
-       }
-
-out:
-       free(obj_buf);
-       bpf__clear();
-       return ret;
-}
-
-static int check_env(void)
-{
-       LIBBPF_OPTS(bpf_prog_load_opts, opts);
-       int err;
-       char license[] = "GPL";
-
-       struct bpf_insn insns[] = {
-               BPF_MOV64_IMM(BPF_REG_0, 1),
-               BPF_EXIT_INSN(),
-       };
-
-       err = fetch_kernel_version(&opts.kern_version, NULL, 0);
-       if (err) {
-               pr_debug("Unable to get kernel version\n");
-               return err;
-       }
-       err = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, license, insns,
-                           ARRAY_SIZE(insns), &opts);
-       if (err < 0) {
-               pr_err("Missing basic BPF support, skip this test: %s\n",
-                      strerror(errno));
-               return err;
-       }
-       close(err);
-
-       return 0;
-}
-
-static int test__bpf(int i)
-{
-       int err;
-
-       if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
-               return TEST_FAIL;
-
-       if (geteuid() != 0) {
-               pr_debug("Only root can run BPF test\n");
-               return TEST_SKIP;
-       }
-
-       if (check_env())
-               return TEST_SKIP;
-
-       err = __test__bpf(i);
-       return err;
-}
-#endif
-
-static int test__basic_bpf_test(struct test_suite *test __maybe_unused,
-                               int subtest __maybe_unused)
-{
-#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
-       return test__bpf(0);
-#else
-       pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n");
-       return TEST_SKIP;
-#endif
-}
-
-static int test__bpf_pinning(struct test_suite *test __maybe_unused,
-                            int subtest __maybe_unused)
-{
-#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
-       return test__bpf(1);
-#else
-       pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n");
-       return TEST_SKIP;
-#endif
-}
-
-static int test__bpf_prologue_test(struct test_suite *test __maybe_unused,
-                                  int subtest __maybe_unused)
-{
-#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE) && defined(HAVE_LIBTRACEEVENT)
-       return test__bpf(2);
-#else
-       pr_debug("Skip BPF test because BPF or libtraceevent support is not compiled\n");
-       return TEST_SKIP;
-#endif
-}
-
-
-static struct test_case bpf_tests[] = {
-#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
-       TEST_CASE("Basic BPF filtering", basic_bpf_test),
-       TEST_CASE_REASON("BPF pinning", bpf_pinning,
-                       "clang isn't installed or environment missing BPF support"),
-#ifdef HAVE_BPF_PROLOGUE
-       TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test,
-                       "clang/debuginfo isn't installed or environment missing BPF support"),
-#else
-       TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
-#endif
-#else
-       TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in or missing libtraceevent support"),
-       TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in or missing libtraceevent support"),
-       TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in or missing libtraceevent support"),
-#endif
-       { .name = NULL, }
-};
-
-struct test_suite suite__bpf = {
-       .desc = "BPF filter",
-       .test_cases = bpf_tests,
-};
index 1f6557c..0ad18cf 100644 (file)
 static bool dont_fork;
 const char *dso_to_test;
 
-struct test_suite *__weak arch_tests[] = {
+/*
+ * List of architecture specific tests. Not a weak symbol as the array length is
+ * dependent on the initialization, as such GCC with LTO complains of
+ * conflicting definitions with a weak symbol.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__)
+extern struct test_suite *arch_tests[];
+#else
+static struct test_suite *arch_tests[] = {
        NULL,
 };
+#endif
 
 static struct test_suite *generic_tests[] = {
        &suite__vmlinux_matches_kallsyms,
@@ -83,9 +92,7 @@ static struct test_suite *generic_tests[] = {
        &suite__fdarray__add,
        &suite__kmod_path__parse,
        &suite__thread_map,
-       &suite__llvm,
        &suite__session_topology,
-       &suite__bpf,
        &suite__thread_map_synthesize,
        &suite__thread_map_remove,
        &suite__cpu_map,
@@ -99,7 +106,6 @@ static struct test_suite *generic_tests[] = {
        &suite__is_printable_array,
        &suite__bitmap_print,
        &suite__perf_hooks,
-       &suite__clang,
        &suite__unit_number__scnprint,
        &suite__mem2node,
        &suite__time_utils,
diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c
deleted file mode 100644 (file)
index a711100..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "tests.h"
-#include "c++/clang-c.h"
-#include <linux/kernel.h>
-
-#ifndef HAVE_LIBCLANGLLVM_SUPPORT
-static int test__clang_to_IR(struct test_suite *test __maybe_unused,
-                            int subtest __maybe_unused)
-{
-       return TEST_SKIP;
-}
-
-static int test__clang_to_obj(struct test_suite *test __maybe_unused,
-                             int subtest __maybe_unused)
-{
-       return TEST_SKIP;
-}
-#endif
-
-static struct test_case clang_tests[] = {
-       TEST_CASE_REASON("builtin clang compile C source to IR", clang_to_IR,
-                        "not compiled in"),
-       TEST_CASE_REASON("builtin clang compile C source to ELF object",
-                        clang_to_obj,
-                        "not compiled in"),
-       { .name = NULL, }
-};
-
-struct test_suite suite__clang = {
-       .desc = "builtin clang support",
-       .test_cases = clang_tests,
-};
diff --git a/tools/perf/tests/config-fragments/README b/tools/perf/tests/config-fragments/README
new file mode 100644 (file)
index 0000000..fe7de5d
--- /dev/null
@@ -0,0 +1,7 @@
+This folder is for kernel config fragments that can be merged with
+defconfig to give full test coverage of a perf test run. This is only
+an optimistic set as some features require hardware support in order to
+pass and not skip.
+
+'config' is shared across all platforms, and for arch specific files,
+the file name should match that used in the ARCH=... make option.
diff --git a/tools/perf/tests/config-fragments/arm64 b/tools/perf/tests/config-fragments/arm64
new file mode 100644 (file)
index 0000000..64c4ab1
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_CORESIGHT_SOURCE_ETM4X=y
diff --git a/tools/perf/tests/config-fragments/config b/tools/perf/tests/config-fragments/config
new file mode 100644 (file)
index 0000000..c340b31
--- /dev/null
@@ -0,0 +1,11 @@
+CONFIG_TRACEPOINTS=y
+CONFIG_STACKTRACE=y
+CONFIG_NOP_TRACER=y
+CONFIG_RING_BUFFER=y
+CONFIG_EVENT_TRACING=y
+CONFIG_CONTEXT_SWITCH_TRACER=y
+CONFIG_TRACING=y
+CONFIG_GENERIC_TRACER=y
+CONFIG_FTRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_BRANCH_PROFILE_NONE=y
index 086fd21..da3a9b5 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  * Test dlfilter C API. A perf.data file is synthesized and then processed
- * by perf script with a dlfilter named dlfilter-test-api-v0.so. Also a C file
+ * by perf script with dlfilters named dlfilter-test-api-v*.so. Also a C file
  * is compiled to provide a dso to match the synthesized perf.data file.
  */
 
@@ -37,6 +37,8 @@
 
 #define MAP_START 0x400000
 
+#define DLFILTER_TEST_NAME_MAX 128
+
 struct test_data {
        struct perf_tool tool;
        struct machine *machine;
@@ -45,6 +47,8 @@ struct test_data {
        u64 bar;
        u64 ip;
        u64 addr;
+       char name[DLFILTER_TEST_NAME_MAX];
+       char desc[DLFILTER_TEST_NAME_MAX];
        char perf[PATH_MAX];
        char perf_data_file_name[PATH_MAX];
        char c_file_name[PATH_MAX];
@@ -215,7 +219,7 @@ static int write_prog(char *file_name)
        return err ? -1 : 0;
 }
 
-static int get_dlfilters_path(char *buf, size_t sz)
+static int get_dlfilters_path(const char *name, char *buf, size_t sz)
 {
        char perf[PATH_MAX];
        char path[PATH_MAX];
@@ -224,12 +228,12 @@ static int get_dlfilters_path(char *buf, size_t sz)
 
        perf_exe(perf, sizeof(perf));
        perf_path = dirname(perf);
-       snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", perf_path);
+       snprintf(path, sizeof(path), "%s/dlfilters/%s", perf_path, name);
        if (access(path, R_OK)) {
                exec_path = get_argv_exec_path();
                if (!exec_path)
                        return -1;
-               snprintf(path, sizeof(path), "%s/dlfilters/dlfilter-test-api-v0.so", exec_path);
+               snprintf(path, sizeof(path), "%s/dlfilters/%s", exec_path, name);
                free(exec_path);
                if (access(path, R_OK))
                        return -1;
@@ -244,9 +248,9 @@ static int check_filter_desc(struct test_data *td)
        char *desc = NULL;
        int ret;
 
-       if (get_filter_desc(td->dlfilters, "dlfilter-test-api-v0.so", &desc, &long_desc) &&
+       if (get_filter_desc(td->dlfilters, td->name, &desc, &long_desc) &&
            long_desc && !strcmp(long_desc, "Filter used by the 'dlfilter C API' perf test") &&
-           desc && !strcmp(desc, "dlfilter to test v0 C API"))
+           desc && !strcmp(desc, td->desc))
                ret = 0;
        else
                ret = -1;
@@ -284,7 +288,7 @@ static int get_ip_addr(struct test_data *td)
 static int do_run_perf_script(struct test_data *td, int do_early)
 {
        return system_cmd("%s script -i %s "
-                         "--dlfilter %s/dlfilter-test-api-v0.so "
+                         "--dlfilter %s/%s "
                          "--dlarg first "
                          "--dlarg %d "
                          "--dlarg %" PRIu64 " "
@@ -292,7 +296,7 @@ static int do_run_perf_script(struct test_data *td, int do_early)
                          "--dlarg %d "
                          "--dlarg last",
                          td->perf, td->perf_data_file_name, td->dlfilters,
-                         verbose, td->ip, td->addr, do_early);
+                         td->name, verbose, td->ip, td->addr, do_early);
 }
 
 static int run_perf_script(struct test_data *td)
@@ -321,7 +325,7 @@ static int test__dlfilter_test(struct test_data *td)
        u64 id = 99;
        int err;
 
-       if (get_dlfilters_path(td->dlfilters, PATH_MAX))
+       if (get_dlfilters_path(td->name, td->dlfilters, PATH_MAX))
                return test_result("dlfilters not found", TEST_SKIP);
 
        if (check_filter_desc(td))
@@ -399,14 +403,18 @@ static void test_data__free(struct test_data *td)
        }
 }
 
-static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+static int test__dlfilter_ver(int ver)
 {
        struct test_data td = {.fd = -1};
        int pid = getpid();
        int err;
 
+       pr_debug("\n-- Testing version %d API --\n", ver);
+
        perf_exe(td.perf, sizeof(td.perf));
 
+       snprintf(td.name, sizeof(td.name), "dlfilter-test-api-v%d.so", ver);
+       snprintf(td.desc, sizeof(td.desc), "dlfilter to test v%d C API", ver);
        snprintf(td.perf_data_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-perf-data", pid);
        snprintf(td.c_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog.c", pid);
        snprintf(td.prog_file_name, PATH_MAX, "/tmp/dlfilter-test-%u-prog", pid);
@@ -416,4 +424,14 @@ static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __
        return err;
 }
 
+static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+{
+       int err = test__dlfilter_ver(0);
+
+       if (err)
+               return err;
+       /* No test for version 1 */
+       return test__dlfilter_ver(2);
+}
+
 DEFINE_SUITE("dlfilter C API", dlfilter);
index c1c3fcb..81229fa 100644 (file)
@@ -70,7 +70,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 {
        struct expr_id_data *val_ptr;
        const char *p;
-       double val, num_cpus, num_cores, num_dies, num_packages;
+       double val, num_cpus_online, num_cpus, num_cores, num_dies, num_packages;
        int ret;
        struct expr_parse_ctx *ctx;
        bool is_intel = false;
@@ -227,7 +227,10 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 
        /* Test toplogy constants appear well ordered. */
        expr__ctx_clear(ctx);
+       TEST_ASSERT_VAL("#num_cpus_online",
+                       expr__parse(&num_cpus_online, ctx, "#num_cpus_online") == 0);
        TEST_ASSERT_VAL("#num_cpus", expr__parse(&num_cpus, ctx, "#num_cpus") == 0);
+       TEST_ASSERT_VAL("#num_cpus >= #num_cpus_online", num_cpus >= num_cpus_online);
        TEST_ASSERT_VAL("#num_cores", expr__parse(&num_cores, ctx, "#num_cores") == 0);
        TEST_ASSERT_VAL("#num_cpus >= #num_cores", num_cpus >= num_cores);
        TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0);
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
deleted file mode 100644 (file)
index 0bc25a5..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "tests.h"
-#include "debug.h"
-
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/libbpf.h>
-#include <util/llvm-utils.h>
-#include "llvm.h"
-static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
-{
-       struct bpf_object *obj;
-
-       obj = bpf_object__open_mem(obj_buf, obj_buf_sz, NULL);
-       if (libbpf_get_error(obj))
-               return TEST_FAIL;
-       bpf_object__close(obj);
-       return TEST_OK;
-}
-
-static struct {
-       const char *source;
-       const char *desc;
-       bool should_load_fail;
-} bpf_source_table[__LLVM_TESTCASE_MAX] = {
-       [LLVM_TESTCASE_BASE] = {
-               .source = test_llvm__bpf_base_prog,
-               .desc = "Basic BPF llvm compile",
-       },
-       [LLVM_TESTCASE_KBUILD] = {
-               .source = test_llvm__bpf_test_kbuild_prog,
-               .desc = "kbuild searching",
-       },
-       [LLVM_TESTCASE_BPF_PROLOGUE] = {
-               .source = test_llvm__bpf_test_prologue_prog,
-               .desc = "Compile source for BPF prologue generation",
-       },
-       [LLVM_TESTCASE_BPF_RELOCATION] = {
-               .source = test_llvm__bpf_test_relocation,
-               .desc = "Compile source for BPF relocation",
-               .should_load_fail = true,
-       },
-};
-
-int
-test_llvm__fetch_bpf_obj(void **p_obj_buf,
-                        size_t *p_obj_buf_sz,
-                        enum test_llvm__testcase idx,
-                        bool force,
-                        bool *should_load_fail)
-{
-       const char *source;
-       const char *desc;
-       const char *tmpl_old, *clang_opt_old;
-       char *tmpl_new = NULL, *clang_opt_new = NULL;
-       int err, old_verbose, ret = TEST_FAIL;
-
-       if (idx >= __LLVM_TESTCASE_MAX)
-               return TEST_FAIL;
-
-       source = bpf_source_table[idx].source;
-       desc = bpf_source_table[idx].desc;
-       if (should_load_fail)
-               *should_load_fail = bpf_source_table[idx].should_load_fail;
-
-       /*
-        * Skip this test if user's .perfconfig doesn't set [llvm] section
-        * and clang is not found in $PATH
-        */
-       if (!force && (!llvm_param.user_set_param &&
-                      llvm__search_clang())) {
-               pr_debug("No clang, skip this test\n");
-               return TEST_SKIP;
-       }
-
-       /*
-        * llvm is verbosity when error. Suppress all error output if
-        * not 'perf test -v'.
-        */
-       old_verbose = verbose;
-       if (verbose == 0)
-               verbose = -1;
-
-       *p_obj_buf = NULL;
-       *p_obj_buf_sz = 0;
-
-       if (!llvm_param.clang_bpf_cmd_template)
-               goto out;
-
-       if (!llvm_param.clang_opt)
-               llvm_param.clang_opt = strdup("");
-
-       err = asprintf(&tmpl_new, "echo '%s' | %s%s", source,
-                      llvm_param.clang_bpf_cmd_template,
-                      old_verbose ? "" : " 2>/dev/null");
-       if (err < 0)
-               goto out;
-       err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt);
-       if (err < 0)
-               goto out;
-
-       tmpl_old = llvm_param.clang_bpf_cmd_template;
-       llvm_param.clang_bpf_cmd_template = tmpl_new;
-       clang_opt_old = llvm_param.clang_opt;
-       llvm_param.clang_opt = clang_opt_new;
-
-       err = llvm__compile_bpf("-", p_obj_buf, p_obj_buf_sz);
-
-       llvm_param.clang_bpf_cmd_template = tmpl_old;
-       llvm_param.clang_opt = clang_opt_old;
-
-       verbose = old_verbose;
-       if (err)
-               goto out;
-
-       ret = TEST_OK;
-out:
-       free(tmpl_new);
-       free(clang_opt_new);
-       if (ret != TEST_OK)
-               pr_debug("Failed to compile test case: '%s'\n", desc);
-       return ret;
-}
-
-static int test__llvm(int subtest)
-{
-       int ret;
-       void *obj_buf = NULL;
-       size_t obj_buf_sz = 0;
-       bool should_load_fail = false;
-
-       if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
-               return TEST_FAIL;
-
-       ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
-                                      subtest, false, &should_load_fail);
-
-       if (ret == TEST_OK && !should_load_fail) {
-               ret = test__bpf_parsing(obj_buf, obj_buf_sz);
-               if (ret != TEST_OK) {
-                       pr_debug("Failed to parse test case '%s'\n",
-                                bpf_source_table[subtest].desc);
-               }
-       }
-       free(obj_buf);
-
-       return ret;
-}
-#endif //HAVE_LIBBPF_SUPPORT
-
-static int test__llvm__bpf_base_prog(struct test_suite *test __maybe_unused,
-                                    int subtest __maybe_unused)
-{
-#ifdef HAVE_LIBBPF_SUPPORT
-       return test__llvm(LLVM_TESTCASE_BASE);
-#else
-       pr_debug("Skip LLVM test because BPF support is not compiled\n");
-       return TEST_SKIP;
-#endif
-}
-
-static int test__llvm__bpf_test_kbuild_prog(struct test_suite *test __maybe_unused,
-                                           int subtest __maybe_unused)
-{
-#ifdef HAVE_LIBBPF_SUPPORT
-       return test__llvm(LLVM_TESTCASE_KBUILD);
-#else
-       pr_debug("Skip LLVM test because BPF support is not compiled\n");
-       return TEST_SKIP;
-#endif
-}
-
-static int test__llvm__bpf_test_prologue_prog(struct test_suite *test __maybe_unused,
-                                             int subtest __maybe_unused)
-{
-#ifdef HAVE_LIBBPF_SUPPORT
-       return test__llvm(LLVM_TESTCASE_BPF_PROLOGUE);
-#else
-       pr_debug("Skip LLVM test because BPF support is not compiled\n");
-       return TEST_SKIP;
-#endif
-}
-
-static int test__llvm__bpf_test_relocation(struct test_suite *test __maybe_unused,
-                                          int subtest __maybe_unused)
-{
-#ifdef HAVE_LIBBPF_SUPPORT
-       return test__llvm(LLVM_TESTCASE_BPF_RELOCATION);
-#else
-       pr_debug("Skip LLVM test because BPF support is not compiled\n");
-       return TEST_SKIP;
-#endif
-}
-
-
-static struct test_case llvm_tests[] = {
-#ifdef HAVE_LIBBPF_SUPPORT
-       TEST_CASE("Basic BPF llvm compile", llvm__bpf_base_prog),
-       TEST_CASE("kbuild searching", llvm__bpf_test_kbuild_prog),
-       TEST_CASE("Compile source for BPF prologue generation",
-                 llvm__bpf_test_prologue_prog),
-       TEST_CASE("Compile source for BPF relocation", llvm__bpf_test_relocation),
-#else
-       TEST_CASE_REASON("Basic BPF llvm compile", llvm__bpf_base_prog, "not compiled in"),
-       TEST_CASE_REASON("kbuild searching", llvm__bpf_test_kbuild_prog, "not compiled in"),
-       TEST_CASE_REASON("Compile source for BPF prologue generation",
-                       llvm__bpf_test_prologue_prog, "not compiled in"),
-       TEST_CASE_REASON("Compile source for BPF relocation",
-                       llvm__bpf_test_relocation, "not compiled in"),
-#endif
-       { .name = NULL, }
-};
-
-struct test_suite suite__llvm = {
-       .desc = "LLVM search and compile",
-       .test_cases = llvm_tests,
-};
diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h
deleted file mode 100644 (file)
index f68b0d9..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PERF_TEST_LLVM_H
-#define PERF_TEST_LLVM_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stddef.h> /* for size_t */
-#include <stdbool.h> /* for bool */
-
-extern const char test_llvm__bpf_base_prog[];
-extern const char test_llvm__bpf_test_kbuild_prog[];
-extern const char test_llvm__bpf_test_prologue_prog[];
-extern const char test_llvm__bpf_test_relocation[];
-
-enum test_llvm__testcase {
-       LLVM_TESTCASE_BASE,
-       LLVM_TESTCASE_KBUILD,
-       LLVM_TESTCASE_BPF_PROLOGUE,
-       LLVM_TESTCASE_BPF_RELOCATION,
-       __LLVM_TESTCASE_MAX,
-};
-
-int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz,
-                            enum test_llvm__testcase index, bool force,
-                            bool *should_load_fail);
-#ifdef __cplusplus
-}
-#endif
-#endif
index 58cf96d..ea4c341 100644 (file)
@@ -95,7 +95,6 @@ make_with_babeltrace:= LIBBABELTRACE=1
 make_with_coresight := CORESIGHT=1
 make_no_sdt        := NO_SDT=1
 make_no_syscall_tbl := NO_SYSCALL_TABLE=1
-make_with_clangllvm := LIBCLANGLLVM=1
 make_no_libpfm4     := NO_LIBPFM4=1
 make_with_gtk2      := GTK2=1
 make_refcnt_check   := EXTRA_CFLAGS="-DREFCNT_CHECKING=1"
index 658fb95..d47f1f8 100644 (file)
@@ -2170,7 +2170,7 @@ static const struct evlist_test test__events[] = {
 
 static const struct evlist_test test__events_pmu[] = {
        {
-               .name  = "cpu/config=10,config1,config2=3,period=1000/u",
+               .name  = "cpu/config=10,config1=1,config2=3,period=1000/u",
                .valid = test__pmu_cpu_valid,
                .check = test__checkevent_pmu,
                /* 0 */
@@ -2472,7 +2472,7 @@ static int test_term(const struct terms_test *t)
 
        INIT_LIST_HEAD(&terms);
 
-       ret = parse_events_terms(&terms, t->str);
+       ret = parse_events_terms(&terms, t->str, /*input=*/ NULL);
        if (ret) {
                pr_debug("failed to parse terms '%s', err %d\n",
                         t->str , ret);
index 64383fc..f5321fb 100644 (file)
@@ -44,6 +44,7 @@ struct perf_pmu_test_pmu {
 
 static const struct perf_pmu_test_event bp_l1_btb_correct = {
        .event = {
+               .pmu = "default_core",
                .name = "bp_l1_btb_correct",
                .event = "event=0x8a",
                .desc = "L1 BTB Correction",
@@ -55,6 +56,7 @@ static const struct perf_pmu_test_event bp_l1_btb_correct = {
 
 static const struct perf_pmu_test_event bp_l2_btb_correct = {
        .event = {
+               .pmu = "default_core",
                .name = "bp_l2_btb_correct",
                .event = "event=0x8b",
                .desc = "L2 BTB Correction",
@@ -66,6 +68,7 @@ static const struct perf_pmu_test_event bp_l2_btb_correct = {
 
 static const struct perf_pmu_test_event segment_reg_loads_any = {
        .event = {
+               .pmu = "default_core",
                .name = "segment_reg_loads.any",
                .event = "event=0x6,period=200000,umask=0x80",
                .desc = "Number of segment register loads",
@@ -77,6 +80,7 @@ static const struct perf_pmu_test_event segment_reg_loads_any = {
 
 static const struct perf_pmu_test_event dispatch_blocked_any = {
        .event = {
+               .pmu = "default_core",
                .name = "dispatch_blocked.any",
                .event = "event=0x9,period=200000,umask=0x20",
                .desc = "Memory cluster signals to block micro-op dispatch for any reason",
@@ -88,6 +92,7 @@ static const struct perf_pmu_test_event dispatch_blocked_any = {
 
 static const struct perf_pmu_test_event eist_trans = {
        .event = {
+               .pmu = "default_core",
                .name = "eist_trans",
                .event = "event=0x3a,period=200000,umask=0x0",
                .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
@@ -99,6 +104,7 @@ static const struct perf_pmu_test_event eist_trans = {
 
 static const struct perf_pmu_test_event l3_cache_rd = {
        .event = {
+               .pmu = "default_core",
                .name = "l3_cache_rd",
                .event = "event=0x40",
                .desc = "L3 cache access, read",
@@ -123,7 +129,7 @@ static const struct perf_pmu_test_event uncore_hisi_ddrc_flux_wcmd = {
        .event = {
                .name = "uncore_hisi_ddrc.flux_wcmd",
                .event = "event=0x2",
-               .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ",
+               .desc = "DDRC write commands",
                .topic = "uncore",
                .long_desc = "DDRC write commands",
                .pmu = "hisi_sccl,ddrc",
@@ -137,7 +143,7 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = {
        .event = {
                .name = "unc_cbo_xsnp_response.miss_eviction",
                .event = "event=0x22,umask=0x81",
-               .desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core. Unit: uncore_cbox ",
+               .desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
                .topic = "uncore",
                .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
                .pmu = "uncore_cbox",
@@ -151,7 +157,7 @@ static const struct perf_pmu_test_event uncore_hyphen = {
        .event = {
                .name = "event-hyphen",
                .event = "event=0xe0,umask=0x00",
-               .desc = "UNC_CBO_HYPHEN. Unit: uncore_cbox ",
+               .desc = "UNC_CBO_HYPHEN",
                .topic = "uncore",
                .long_desc = "UNC_CBO_HYPHEN",
                .pmu = "uncore_cbox",
@@ -165,7 +171,7 @@ static const struct perf_pmu_test_event uncore_two_hyph = {
        .event = {
                .name = "event-two-hyph",
                .event = "event=0xc0,umask=0x00",
-               .desc = "UNC_CBO_TWO_HYPH. Unit: uncore_cbox ",
+               .desc = "UNC_CBO_TWO_HYPH",
                .topic = "uncore",
                .long_desc = "UNC_CBO_TWO_HYPH",
                .pmu = "uncore_cbox",
@@ -179,7 +185,7 @@ static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = {
        .event = {
                .name = "uncore_hisi_l3c.rd_hit_cpipe",
                .event = "event=0x7",
-               .desc = "Total read hits. Unit: hisi_sccl,l3c ",
+               .desc = "Total read hits",
                .topic = "uncore",
                .long_desc = "Total read hits",
                .pmu = "hisi_sccl,l3c",
@@ -193,7 +199,7 @@ static const struct perf_pmu_test_event uncore_imc_free_running_cache_miss = {
        .event = {
                .name = "uncore_imc_free_running.cache_miss",
                .event = "event=0x12",
-               .desc = "Total cache misses. Unit: uncore_imc_free_running ",
+               .desc = "Total cache misses",
                .topic = "uncore",
                .long_desc = "Total cache misses",
                .pmu = "uncore_imc_free_running",
@@ -207,7 +213,7 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = {
        .event = {
                .name = "uncore_imc.cache_hits",
                .event = "event=0x34",
-               .desc = "Total cache hits. Unit: uncore_imc ",
+               .desc = "Total cache hits",
                .topic = "uncore",
                .long_desc = "Total cache hits",
                .pmu = "uncore_imc",
@@ -232,13 +238,13 @@ static const struct perf_pmu_test_event sys_ddr_pmu_write_cycles = {
        .event = {
                .name = "sys_ddr_pmu.write_cycles",
                .event = "event=0x2b",
-               .desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ",
+               .desc = "ddr write-cycles event",
                .topic = "uncore",
                .pmu = "uncore_sys_ddr_pmu",
                .compat = "v8",
        },
        .alias_str = "event=0x2b",
-       .alias_long_desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ",
+       .alias_long_desc = "ddr write-cycles event",
        .matching_pmu = "uncore_sys_ddr_pmu",
 };
 
@@ -246,13 +252,13 @@ static const struct perf_pmu_test_event sys_ccn_pmu_read_cycles = {
        .event = {
                .name = "sys_ccn_pmu.read_cycles",
                .event = "config=0x2c",
-               .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ",
+               .desc = "ccn read-cycles event",
                .topic = "uncore",
                .pmu = "uncore_sys_ccn_pmu",
                .compat = "0x01",
        },
        .alias_str = "config=0x2c",
-       .alias_long_desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ",
+       .alias_long_desc = "ccn read-cycles event",
        .matching_pmu = "uncore_sys_ccn_pmu",
 };
 
@@ -341,7 +347,7 @@ static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event
        return 0;
 }
 
-static int compare_alias_to_test_event(struct perf_pmu_alias *alias,
+static int compare_alias_to_test_event(struct pmu_event_info *alias,
                                struct perf_pmu_test_event const *test_event,
                                char const *pmu_name)
 {
@@ -385,8 +391,8 @@ static int compare_alias_to_test_event(struct perf_pmu_alias *alias,
                return -1;
        }
 
-
-       if (!is_same(alias->pmu_name, test_event->event.pmu)) {
+       if (!is_same(alias->pmu_name, test_event->event.pmu) &&
+           !is_same(alias->pmu_name, "default_core")) {
                pr_debug("testing aliases PMU %s: mismatched pmu_name, %s vs %s\n",
                          pmu_name, alias->pmu_name, test_event->event.pmu);
                return -1;
@@ -403,7 +409,7 @@ static int test__pmu_event_table_core_callback(const struct pmu_event *pe,
        struct perf_pmu_test_event const **test_event_table;
        bool found = false;
 
-       if (pe->pmu)
+       if (strcmp(pe->pmu, "default_core"))
                test_event_table = &uncore_events[0];
        else
                test_event_table = &core_events[0];
@@ -477,12 +483,14 @@ static int test__pmu_event_table(struct test_suite *test __maybe_unused,
        if (!table || !sys_event_table)
                return -1;
 
-       err = pmu_events_table_for_each_event(table, test__pmu_event_table_core_callback,
+       err = pmu_events_table__for_each_event(table, /*pmu=*/ NULL,
+                                             test__pmu_event_table_core_callback,
                                              &map_events);
        if (err)
                return err;
 
-       err = pmu_events_table_for_each_event(sys_event_table, test__pmu_event_table_sys_callback,
+       err = pmu_events_table__for_each_event(sys_event_table, /*pmu=*/ NULL,
+                                             test__pmu_event_table_sys_callback,
                                              &map_events);
        if (err)
                return err;
@@ -496,26 +504,30 @@ static int test__pmu_event_table(struct test_suite *test __maybe_unused,
        return 0;
 }
 
-static struct perf_pmu_alias *find_alias(const char *test_event, struct list_head *aliases)
-{
-       struct perf_pmu_alias *alias;
+struct test_core_pmu_event_aliases_cb_args {
+       struct perf_pmu_test_event const *test_event;
+       int *count;
+};
 
-       list_for_each_entry(alias, aliases, list)
-               if (!strcmp(test_event, alias->name))
-                       return alias;
+static int test_core_pmu_event_aliases_cb(void *state, struct pmu_event_info *alias)
+{
+       struct test_core_pmu_event_aliases_cb_args *args = state;
 
-       return NULL;
+       if (compare_alias_to_test_event(alias, args->test_event, alias->pmu->name))
+               return -1;
+       (*args->count)++;
+       pr_debug2("testing aliases core PMU %s: matched event %s\n",
+               alias->pmu_name, alias->name);
+       return 0;
 }
 
 /* Verify aliases are as expected */
-static int __test_core_pmu_event_aliases(char *pmu_name, int *count)
+static int __test_core_pmu_event_aliases(const char *pmu_name, int *count)
 {
        struct perf_pmu_test_event const **test_event_table;
        struct perf_pmu *pmu;
-       LIST_HEAD(aliases);
        int res = 0;
        const struct pmu_events_table *table = find_core_events_table("testarch", "testcpu");
-       struct perf_pmu_alias *a, *tmp;
 
        if (!table)
                return -1;
@@ -526,37 +538,40 @@ static int __test_core_pmu_event_aliases(char *pmu_name, int *count)
        if (!pmu)
                return -1;
 
-       pmu->name = pmu_name;
-
-       pmu_add_cpu_aliases_table(&aliases, pmu, table);
-
+       INIT_LIST_HEAD(&pmu->format);
+       INIT_LIST_HEAD(&pmu->aliases);
+       INIT_LIST_HEAD(&pmu->caps);
+       INIT_LIST_HEAD(&pmu->list);
+       pmu->name = strdup(pmu_name);
+       pmu->is_core = true;
+
+       pmu->events_table = table;
+       pmu_add_cpu_aliases_table(pmu, table);
+       pmu->cpu_aliases_added = true;
+       pmu->sysfs_aliases_loaded = true;
+
+       res = pmu_events_table__find_event(table, pmu, "bp_l1_btb_correct", NULL, NULL);
+       if (res != 0) {
+               pr_debug("Missing test event in test architecture");
+               return res;
+       }
        for (; *test_event_table; test_event_table++) {
-               struct perf_pmu_test_event const *test_event = *test_event_table;
-               struct pmu_event const *event = &test_event->event;
-               struct perf_pmu_alias *alias = find_alias(event->name, &aliases);
-
-               if (!alias) {
-                       pr_debug("testing aliases core PMU %s: no alias, alias_table->name=%s\n",
-                                 pmu_name, event->name);
-                       res = -1;
-                       break;
-               }
-
-               if (compare_alias_to_test_event(alias, test_event, pmu_name)) {
-                       res = -1;
-                       break;
-               }
-
-               (*count)++;
-               pr_debug2("testing aliases core PMU %s: matched event %s\n",
-                         pmu_name, alias->name);
+               struct perf_pmu_test_event test_event = **test_event_table;
+               struct pmu_event const *event = &test_event.event;
+               struct test_core_pmu_event_aliases_cb_args args = {
+                       .test_event = &test_event,
+                       .count = count,
+               };
+               int err;
+
+               test_event.event.pmu = pmu_name;
+               err = perf_pmu__find_event(pmu, event->name, &args,
+                                          test_core_pmu_event_aliases_cb);
+               if (err)
+                       res = err;
        }
+       perf_pmu__delete(pmu);
 
-       list_for_each_entry_safe(a, tmp, &aliases, list) {
-               list_del(&a->list);
-               perf_pmu_free_alias(a);
-       }
-       free(pmu);
        return res;
 }
 
@@ -566,20 +581,20 @@ static int __test_uncore_pmu_event_aliases(struct perf_pmu_test_pmu *test_pmu)
        struct perf_pmu_test_event const **table;
        struct perf_pmu *pmu = &test_pmu->pmu;
        const char *pmu_name = pmu->name;
-       struct perf_pmu_alias *a, *tmp, *alias;
        const struct pmu_events_table *events_table;
-       LIST_HEAD(aliases);
        int res = 0;
 
        events_table = find_core_events_table("testarch", "testcpu");
        if (!events_table)
                return -1;
-       pmu_add_cpu_aliases_table(&aliases, pmu, events_table);
-       pmu_add_sys_aliases(&aliases, pmu);
+       pmu->events_table = events_table;
+       pmu_add_cpu_aliases_table(pmu, events_table);
+       pmu->cpu_aliases_added = true;
+       pmu->sysfs_aliases_loaded = true;
+       pmu_add_sys_aliases(pmu);
 
        /* Count how many aliases we generated */
-       list_for_each_entry(alias, &aliases, list)
-               alias_count++;
+       alias_count = perf_pmu__num_events(pmu);
 
        /* Count how many aliases we expect from the known table */
        for (table = &test_pmu->aliases[0]; *table; table++)
@@ -588,33 +603,25 @@ static int __test_uncore_pmu_event_aliases(struct perf_pmu_test_pmu *test_pmu)
        if (alias_count != to_match_count) {
                pr_debug("testing aliases uncore PMU %s: mismatch expected aliases (%d) vs found (%d)\n",
                         pmu_name, to_match_count, alias_count);
-               res = -1;
-               goto out;
+               return -1;
        }
 
-       list_for_each_entry(alias, &aliases, list) {
-               bool matched = false;
-
-               for (table = &test_pmu->aliases[0]; *table; table++) {
-                       struct perf_pmu_test_event const *test_event = *table;
-                       struct pmu_event const *event = &test_event->event;
-
-                       if (!strcmp(event->name, alias->name)) {
-                               if (compare_alias_to_test_event(alias,
-                                                       test_event,
-                                                       pmu_name)) {
-                                       continue;
-                               }
-                               matched = true;
-                               matched_count++;
-                       }
-               }
-
-               if (matched == false) {
+       for (table = &test_pmu->aliases[0]; *table; table++) {
+               struct perf_pmu_test_event test_event = **table;
+               struct pmu_event const *event = &test_event.event;
+               int err;
+               struct test_core_pmu_event_aliases_cb_args args = {
+                       .test_event = &test_event,
+                       .count = &matched_count,
+               };
+
+               err = perf_pmu__find_event(pmu, event->name, &args,
+                                          test_core_pmu_event_aliases_cb);
+               if (err) {
+                       res = err;
                        pr_debug("testing aliases uncore PMU %s: could not match alias %s\n",
-                                pmu_name, alias->name);
-                       res = -1;
-                       goto out;
+                                pmu_name, event->name);
+                       return -1;
                }
        }
 
@@ -623,19 +630,13 @@ static int __test_uncore_pmu_event_aliases(struct perf_pmu_test_pmu *test_pmu)
                         pmu_name, matched_count, alias_count);
                res = -1;
        }
-
-out:
-       list_for_each_entry_safe(a, tmp, &aliases, list) {
-               list_del(&a->list);
-               perf_pmu_free_alias(a);
-       }
        return res;
 }
 
 static struct perf_pmu_test_pmu test_pmus[] = {
        {
                .pmu = {
-                       .name = (char *)"hisi_sccl1_ddrc2",
+                       .name = "hisi_sccl1_ddrc2",
                        .is_uncore = 1,
                },
                .aliases = {
@@ -644,7 +645,7 @@ static struct perf_pmu_test_pmu test_pmus[] = {
        },
        {
                .pmu = {
-                       .name = (char *)"uncore_cbox_0",
+                       .name = "uncore_cbox_0",
                        .is_uncore = 1,
                },
                .aliases = {
@@ -655,7 +656,7 @@ static struct perf_pmu_test_pmu test_pmus[] = {
        },
        {
                .pmu = {
-                       .name = (char *)"hisi_sccl3_l3c7",
+                       .name = "hisi_sccl3_l3c7",
                        .is_uncore = 1,
                },
                .aliases = {
@@ -664,7 +665,7 @@ static struct perf_pmu_test_pmu test_pmus[] = {
        },
        {
                .pmu = {
-                       .name = (char *)"uncore_imc_free_running_0",
+                       .name = "uncore_imc_free_running_0",
                        .is_uncore = 1,
                },
                .aliases = {
@@ -673,7 +674,7 @@ static struct perf_pmu_test_pmu test_pmus[] = {
        },
        {
                .pmu = {
-                       .name = (char *)"uncore_imc_0",
+                       .name = "uncore_imc_0",
                        .is_uncore = 1,
                },
                .aliases = {
@@ -682,9 +683,9 @@ static struct perf_pmu_test_pmu test_pmus[] = {
        },
        {
                .pmu = {
-                       .name = (char *)"uncore_sys_ddr_pmu0",
+                       .name = "uncore_sys_ddr_pmu0",
                        .is_uncore = 1,
-                       .id = (char *)"v8",
+                       .id = "v8",
                },
                .aliases = {
                        &sys_ddr_pmu_write_cycles,
@@ -692,9 +693,9 @@ static struct perf_pmu_test_pmu test_pmus[] = {
        },
        {
                .pmu = {
-                       .name = (char *)"uncore_sys_ccn_pmu4",
+                       .name = "uncore_sys_ccn_pmu4",
                        .is_uncore = 1,
-                       .id = (char *)"0x01",
+                       .id = "0x01",
                },
                .aliases = {
                        &sys_ccn_pmu_read_cycles,
@@ -732,8 +733,13 @@ static int test__aliases(struct test_suite *test __maybe_unused,
        }
 
        for (i = 0; i < ARRAY_SIZE(test_pmus); i++) {
-               int res = __test_uncore_pmu_event_aliases(&test_pmus[i]);
+               int res;
+
+               INIT_LIST_HEAD(&test_pmus[i].pmu.format);
+               INIT_LIST_HEAD(&test_pmus[i].pmu.aliases);
+               INIT_LIST_HEAD(&test_pmus[i].pmu.caps);
 
+               res = __test_uncore_pmu_event_aliases(&test_pmus[i]);
                if (res)
                        return res;
        }
index a445263..eb60e5f 100644 (file)
@@ -7,6 +7,7 @@
 #include <stdio.h>
 #include <linux/kernel.h>
 #include <linux/limits.h>
+#include <linux/zalloc.h>
 
 /* Simulated format definitions. */
 static struct test_format {
@@ -27,55 +28,55 @@ static struct test_format {
 /* Simulated users input. */
 static struct parse_events_term test_terms[] = {
        {
-               .config    = (char *) "krava01",
+               .config    = "krava01",
                .val.num   = 15,
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = PARSE_EVENTS__TERM_TYPE_USER,
        },
        {
-               .config    = (char *) "krava02",
+               .config    = "krava02",
                .val.num   = 170,
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = PARSE_EVENTS__TERM_TYPE_USER,
        },
        {
-               .config    = (char *) "krava03",
+               .config    = "krava03",
                .val.num   = 1,
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = PARSE_EVENTS__TERM_TYPE_USER,
        },
        {
-               .config    = (char *) "krava11",
+               .config    = "krava11",
                .val.num   = 27,
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = PARSE_EVENTS__TERM_TYPE_USER,
        },
        {
-               .config    = (char *) "krava12",
+               .config    = "krava12",
                .val.num   = 1,
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = PARSE_EVENTS__TERM_TYPE_USER,
        },
        {
-               .config    = (char *) "krava13",
+               .config    = "krava13",
                .val.num   = 2,
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = PARSE_EVENTS__TERM_TYPE_USER,
        },
        {
-               .config    = (char *) "krava21",
+               .config    = "krava21",
                .val.num   = 119,
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = PARSE_EVENTS__TERM_TYPE_USER,
        },
        {
-               .config    = (char *) "krava22",
+               .config    = "krava22",
                .val.num   = 11,
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = PARSE_EVENTS__TERM_TYPE_USER,
        },
        {
-               .config    = (char *) "krava23",
+               .config    = "krava23",
                .val.num   = 2,
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = PARSE_EVENTS__TERM_TYPE_USER,
@@ -141,48 +142,55 @@ static struct list_head *test_terms_list(void)
 static int test__pmu(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        char dir[PATH_MAX];
-       char *format = test_format_dir_get(dir, sizeof(dir));
-       LIST_HEAD(formats);
+       char *format;
        struct list_head *terms = test_terms_list();
+       struct perf_event_attr attr;
+       struct perf_pmu *pmu;
+       int fd;
        int ret;
 
-       if (!format)
-               return -EINVAL;
-
-       do {
-               struct perf_event_attr attr;
-               int fd;
-
-               memset(&attr, 0, sizeof(attr));
-
-               fd = open(format, O_DIRECTORY);
-               if (fd < 0) {
-                       ret = fd;
-                       break;
-               }
-               ret = perf_pmu__format_parse(fd, &formats);
-               if (ret)
-                       break;
-
-               ret = perf_pmu__config_terms("perf-pmu-test", &formats, &attr,
-                                            terms, false, NULL);
-               if (ret)
-                       break;
+       pmu = zalloc(sizeof(*pmu));
+       if (!pmu)
+               return -ENOMEM;
 
-               ret = -EINVAL;
+       INIT_LIST_HEAD(&pmu->format);
+       INIT_LIST_HEAD(&pmu->aliases);
+       INIT_LIST_HEAD(&pmu->caps);
+       format = test_format_dir_get(dir, sizeof(dir));
+       if (!format) {
+               free(pmu);
+               return -EINVAL;
+       }
 
-               if (attr.config  != 0xc00000000002a823)
-                       break;
-               if (attr.config1 != 0x8000400000000145)
-                       break;
-               if (attr.config2 != 0x0400000020041d07)
-                       break;
+       memset(&attr, 0, sizeof(attr));
 
-               ret = 0;
-       } while (0);
+       fd = open(format, O_DIRECTORY);
+       if (fd < 0) {
+               ret = fd;
+               goto out;
+       }
 
-       perf_pmu__del_formats(&formats);
+       pmu->name = strdup("perf-pmu-test");
+       ret = perf_pmu__format_parse(pmu, fd, /*eager_load=*/true);
+       if (ret)
+               goto out;
+
+       ret = perf_pmu__config_terms(pmu, &attr, terms, /*zero=*/false, /*err=*/NULL);
+       if (ret)
+               goto out;
+
+       ret = -EINVAL;
+       if (attr.config  != 0xc00000000002a823)
+               goto out;
+       if (attr.config1 != 0x8000400000000145)
+               goto out;
+       if (attr.config2 != 0x0400000020041d07)
+               goto out;
+
+       ret = 0;
+out:
        test_format_dir_put(format);
+       perf_pmu__delete(pmu);
        return ret;
 }
 
index 569e9d4..779bc86 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="asm_pure_loop"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS=""
 DATV="out"
 DATA="$DATD/perf-$TEST-$DATV.data"
index d21ba85..08a44e5 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="memcpy_thread"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS="16 10 1"
 DATV="16k_10"
 DATA="$DATD/perf-$TEST-$DATV.data"
index 7c13636..c83a200 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="thread_loop"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS="10 1"
 DATV="check-tid-10th"
 DATA="$DATD/perf-$TEST-$DATV.data"
index a067145..6346fd5 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="thread_loop"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS="2 20"
 DATV="check-tid-2th"
 DATA="$DATD/perf-$TEST-$DATV.data"
index f48c852..7304e3d 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="unroll_loop_thread"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS="10"
 DATV="10"
 DATA="$DATD/perf-$TEST-$DATV.data"
index 51e3f60..5aa6e2e 100644 (file)
@@ -1,3 +1,4 @@
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
index 60c5e34..bf4c1fb 100644 (file)
@@ -1,3 +1,4 @@
+#!/bin/sh
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
 perf probe -l 2>&1 | grep -q probe:vfs_getname
@@ -10,11 +11,11 @@ cleanup_probe_vfs_getname() {
 }
 
 add_probe_vfs_getname() {
-       local verbose=$1
+       add_probe_verbose=$1
        if [ $had_vfs_getname -eq 1 ] ; then
                line=$(perf probe -L getname_flags 2>&1 | grep -E 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
                perf probe -q       "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
-               perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
+               perf probe $add_probe_verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
        fi
 }
 
index 698343f..3cc158a 100644 (file)
@@ -1,3 +1,4 @@
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 # Return true if perf_event_paranoid is > $1 and not running as root.
index e7a3913..bdd5a7c 100644 (file)
@@ -1,3 +1,4 @@
+#!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
 tenths=date\ +%s%1N
index 4a19442..d120e83 100755 (executable)
@@ -21,7 +21,7 @@ trap_cleanup() {
 trap trap_cleanup EXIT TERM INT
 
 check() {
-       if [ `id -u` != 0 ]; then
+       if [ "$(id -u)" != 0 ]; then
                echo "[Skip] No root permission"
                err=2
                exit
@@ -157,10 +157,10 @@ test_lock_filter()
        perf lock contention -i ${perfdata} -L tasklist_lock -q 2> ${result}
 
        # find out the type of tasklist_lock
-       local type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//')
+       test_lock_filter_type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//')
 
-       if [ "$(grep -c -v "${type}" "${result}")" != "0" ]; then
-               echo "[Fail] Recorded result should not have non-${type} locks:" "$(cat "${result}")"
+       if [ "$(grep -c -v "${test_lock_filter_type}" "${result}")" != "0" ]; then
+               echo "[Fail] Recorded result should not have non-${test_lock_filter_type} locks:" "$(cat "${result}")"
                err=1
                exit
        fi
@@ -170,8 +170,8 @@ test_lock_filter()
        fi
 
        perf lock con -a -b -L tasklist_lock -q -- perf bench sched messaging > /dev/null 2> ${result}
-       if [ "$(grep -c -v "${type}" "${result}")" != "0" ]; then
-               echo "[Fail] BPF result should not have non-${type} locks:" "$(cat "${result}")"
+       if [ "$(grep -c -v "${test_lock_filter_type}" "${result}")" != "0" ]; then
+               echo "[Fail] BPF result should not have non-${test_lock_filter_type} locks:" "$(cat "${result}")"
                err=1
                exit
        fi
index 5d1b63d..871243d 100755 (executable)
@@ -4,11 +4,11 @@
 # SPDX-License-Identifier: GPL-2.0
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
-. $(dirname $0)/lib/probe.sh
+. "$(dirname $0)"/lib/probe.sh
 
 skip_if_no_perf_probe || exit 2
 
-. $(dirname $0)/lib/probe_vfs_getname.sh
+. "$(dirname $0)"/lib/probe_vfs_getname.sh
 
 add_probe_vfs_getname || skip_if_no_debuginfo
 err=$?
index 49bd875..8929046 100755 (executable)
@@ -13,25 +13,25 @@ skip_if_no_z_record() {
 collect_z_record() {
        echo "Collecting compressed record file:"
        [ "$(uname -m)" != s390x ] && gflag='-g'
-       $perf_tool record -o $trace_file $gflag -z -F 5000 -- \
+       $perf_tool record -o "$trace_file" $gflag -z -F 5000 -- \
                dd count=500 if=/dev/urandom of=/dev/null
 }
 
 check_compressed_stats() {
        echo "Checking compressed events stats:"
-       $perf_tool report -i $trace_file --header --stats | \
+       $perf_tool report -i "$trace_file" --header --stats | \
                grep -E "(# compressed : Zstd,)|(COMPRESSED events:)"
 }
 
 check_compressed_output() {
-       $perf_tool inject -i $trace_file -o $trace_file.decomp &&
-       $perf_tool report -i $trace_file --stdio -F comm,dso,sym | head -n -3 > $trace_file.comp.output &&
-       $perf_tool report -i $trace_file.decomp --stdio -F comm,dso,sym | head -n -3 > $trace_file.decomp.output &&
-       diff $trace_file.comp.output $trace_file.decomp.output
+       $perf_tool inject -i "$trace_file" -o "$trace_file.decomp" &&
+       $perf_tool report -i "$trace_file" --stdio -F comm,dso,sym | head -n -3 > "$trace_file.comp.output" &&
+       $perf_tool report -i "$trace_file.decomp" --stdio -F comm,dso,sym | head -n -3 > "$trace_file.decomp.output" &&
+       diff "$trace_file.comp.output" "$trace_file.decomp.output"
 }
 
 skip_if_no_z_record || exit 2
 collect_z_record && check_compressed_stats && check_compressed_output
 err=$?
-rm -f $trace_file*
+rm -f "$trace_file*"
 exit $err
diff --git a/tools/perf/tests/shell/record_bpf_filter.sh b/tools/perf/tests/shell/record_bpf_filter.sh
new file mode 100755 (executable)
index 0000000..31c5939
--- /dev/null
@@ -0,0 +1,134 @@
+#!/bin/sh
+# perf record sample filtering (by BPF) tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+
+cleanup() {
+  rm -f "${perfdata}"
+  rm -f "${perfdata}".old
+  trap - EXIT TERM INT
+}
+
+trap_cleanup() {
+  cleanup
+  exit 1
+}
+trap trap_cleanup EXIT TERM INT
+
+test_bpf_filter_priv() {
+  echo "Checking BPF-filter privilege"
+
+  if [ "$(id -u)" != 0 ]
+  then
+    echo "bpf-filter test [Skipped permission]"
+    err=2
+    return
+  fi
+  if ! perf record -e task-clock --filter 'period > 1' \
+         -o /dev/null --quiet true 2>&1
+  then
+    echo "bpf-filter test [Skipped missing BPF support]"
+    err=2
+    return
+  fi
+}
+
+test_bpf_filter_basic() {
+  echo "Basic bpf-filter test"
+
+  if ! perf record -e task-clock -c 10000 --filter 'ip < 0xffffffff00000000' \
+         -o "${perfdata}" true 2> /dev/null
+  then
+    echo "Basic bpf-filter test [Failed record]"
+    err=1
+    return
+  fi
+  if perf script -i "${perfdata}" -F ip | grep 'ffffffff[0-9a-f]*'
+  then
+    if uname -r | grep -q ^6.2
+    then
+      echo "Basic bpf-filter test [Skipped unsupported kernel]"
+      err=2
+      return
+    fi
+    echo "Basic bpf-filter test [Failed invalid output]"
+    err=1
+    return
+  fi
+  echo "Basic bpf-filter test [Success]"
+}
+
+test_bpf_filter_fail() {
+  echo "Failing bpf-filter test"
+
+  # 'cpu' requires PERF_SAMPLE_CPU flag
+  if ! perf record -e task-clock --filter 'cpu > 0' \
+         -o /dev/null true 2>&1 | grep PERF_SAMPLE_CPU
+  then
+    echo "Failing bpf-filter test [Failed forbidden CPU]"
+    err=1
+    return
+  fi
+
+  if ! perf record --sample-cpu -e task-clock --filter 'cpu > 0' \
+         -o /dev/null true 2>/dev/null
+  then
+    echo "Failing bpf-filter test [Failed should succeed]"
+    err=1
+    return
+  fi
+
+  echo "Failing bpf-filter test [Success]"
+}
+
+test_bpf_filter_group() {
+  echo "Group bpf-filter test"
+
+  if ! perf record -e task-clock --filter 'period > 1000 || ip > 0' \
+         -o /dev/null true 2>/dev/null
+  then
+    echo "Group bpf-filter test [Failed should succeed]"
+    err=1
+    return
+  fi
+
+  if ! perf record -e task-clock --filter 'cpu > 0 || ip > 0' \
+         -o /dev/null true 2>&1 | grep PERF_SAMPLE_CPU
+  then
+    echo "Group bpf-filter test [Failed forbidden CPU]"
+    err=1
+    return
+  fi
+
+  if ! perf record -e task-clock --filter 'period > 0 || code_pgsz > 4096' \
+         -o /dev/null true 2>&1 | grep PERF_SAMPLE_CODE_PAGE_SIZE
+  then
+    echo "Group bpf-filter test [Failed forbidden CODE_PAGE_SIZE]"
+    err=1
+    return
+  fi
+
+  echo "Group bpf-filter test [Success]"
+}
+
+
+test_bpf_filter_priv
+
+if [ $err = 0 ]; then
+  test_bpf_filter_basic
+fi
+
+if [ $err = 0 ]; then
+  test_bpf_filter_fail
+fi
+
+if [ $err = 0 ]; then
+  test_bpf_filter_group
+fi
+
+cleanup
+exit $err
index f062ae9..a0d14cd 100755 (executable)
@@ -10,19 +10,19 @@ perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
 cleanup() {
   rm -f ${perfdata}
   rm -f ${perfdata}.old
-  trap - exit term int
+  trap - EXIT TERM INT
 }
 
 trap_cleanup() {
   cleanup
   exit 1
 }
-trap trap_cleanup exit term int
+trap trap_cleanup EXIT TERM INT
 
 test_offcpu_priv() {
   echo "Checking off-cpu privilege"
 
-  if [ `id -u` != 0 ]
+  if [ "$(id -u)" != 0 ]
   then
     echo "off-cpu test [Skipped permission]"
     err=2
index 34a0701..d890eb2 100755 (executable)
@@ -6,7 +6,7 @@
 
 set -e
 
-. $(dirname $0)/lib/stat_output.sh
+. "$(dirname $0)"/lib/stat_output.sh
 
 csv_sep=@
 
index 5571ff7..8bae9c8 100755 (executable)
@@ -10,7 +10,7 @@ set -e
 #
 perf stat -e cycles  -x' ' -I1000 --interval-count 1 --summary 2>&1 | \
 grep -e summary | \
-while read summary num event run pct
+while read summary _num _event _run _pct
 do
        if [ $summary != "summary" ]; then
                exit 1
@@ -23,7 +23,7 @@ done
 #
 perf stat -e cycles  -x' ' -I1000 --interval-count 1 --summary --no-csv-summary 2>&1 | \
 grep -e summary | \
-while read num event run pct
+while read _num _event _run _pct
 do
        exit 1
 done
index 0e9cba8..a1918a1 100755 (executable)
@@ -14,7 +14,7 @@ test_global_aggr()
 {
        perf stat -a --no-big-num -e cycles,instructions sleep 1  2>&1 | \
        grep -e cycles -e instructions | \
-       while read num evt hash ipc rest
+       while read num evt _hash ipc rest
        do
                # skip not counted events
                if [ "$num" = "<not" ]; then
@@ -45,7 +45,7 @@ test_no_aggr()
 {
        perf stat -a -A --no-big-num -e cycles,instructions sleep 1  2>&1 | \
        grep ^CPU | \
-       while read cpu num evt hash ipc rest
+       while read cpu num evt _hash ipc rest
        do
                # skip not counted events
                if [ "$num" = "<not" ]; then
index f972b31..fb2b105 100755 (executable)
@@ -6,7 +6,7 @@
 
 set -e
 
-. $(dirname $0)/lib/stat_output.sh
+. "$(dirname $0)"/lib/stat_output.sh
 
 stat_output=$(mktemp /tmp/__perf_test.stat_output.std.XXXXX)
 
@@ -28,7 +28,6 @@ trap trap_cleanup EXIT TERM INT
 
 function commachecker()
 {
-       local -i cnt=0
        local prefix=1
 
        case "$1"
index 13473ae..a87bb28 100755 (executable)
@@ -22,21 +22,21 @@ compare_number()
 }
 
 # skip if --bpf-counters is not supported
-if ! perf stat --bpf-counters true > /dev/null 2>&1; then
+if ! perf stat -e cycles --bpf-counters true > /dev/null 2>&1; then
        if [ "$1" = "-v" ]; then
                echo "Skipping: --bpf-counters not supported"
-               perf --no-pager stat --bpf-counters true || true
+               perf --no-pager stat -e cycles --bpf-counters true || true
        fi
        exit 2
 fi
 
 base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
-if [ "$base_cycles" == "<not" ]; then
+if [ "$base_cycles" = "<not" ]; then
        echo "Skipping: cycles event not counted"
        exit 2
 fi
 bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
-if [ "$bpf_cycles" == "<not" ]; then
+if [ "$bpf_cycles" = "<not" ]; then
        echo "Failed: cycles not counted with --bpf-counters"
        exit 1
 fi
index d724855..e75d078 100755 (executable)
@@ -25,22 +25,22 @@ check_bpf_counter()
 find_cgroups()
 {
        # try usual systemd slices first
-       if [ -d /sys/fs/cgroup/system.slice -a -d /sys/fs/cgroup/user.slice ]; then
+       if [ -d /sys/fs/cgroup/system.slice ] && [ -d /sys/fs/cgroup/user.slice ]; then
                test_cgroups="system.slice,user.slice"
                return
        fi
 
        # try root and self cgroups
-       local self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
-       if [ -z ${self_cgrp} ]; then
+       find_cgroups_self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
+       if [ -z ${find_cgroups_self_cgrp} ]; then
                # cgroup v2 doesn't specify perf_event
-               self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
+               find_cgroups_self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
        fi
 
-       if [ -z ${self_cgrp} ]; then
+       if [ -z ${find_cgroups_self_cgrp} ]; then
                test_cgroups="/"
        else
-               test_cgroups="/,${self_cgrp}"
+               test_cgroups="/,${find_cgroups_self_cgrp}"
        fi
 }
 
@@ -48,13 +48,11 @@ find_cgroups()
 # Just check if it runs without failure and has non-zero results.
 check_system_wide_counted()
 {
-       local output
-
-       output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1  2>&1)
-       if echo ${output} | grep -q -F "<not "; then
+       check_system_wide_counted_output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1  2>&1)
+       if echo ${check_system_wide_counted_output} | grep -q -F "<not "; then
                echo "Some system-wide events are not counted"
                if [ "${verbose}" = "1" ]; then
-                       echo ${output}
+                       echo ${check_system_wide_counted_output}
                fi
                exit 1
        fi
@@ -62,13 +60,11 @@ check_system_wide_counted()
 
 check_cpu_list_counted()
 {
-       local output
-
-       output=$(perf stat -C 1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1  2>&1)
-       if echo ${output} | grep -q -F "<not "; then
+       check_cpu_list_counted_output=$(perf stat -C 0,1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1  2>&1)
+       if echo ${check_cpu_list_counted_output} | grep -q -F "<not "; then
                echo "Some CPU events are not counted"
                if [ "${verbose}" = "1" ]; then
-                       echo ${output}
+                       echo ${check_cpu_list_counted_output}
                fi
                exit 1
        fi
index fad3616..1a7e6a8 100755 (executable)
@@ -22,7 +22,7 @@ cleanup_files()
        rm -f ${PERF_DATA}
 }
 
-trap cleanup_files exit term int
+trap cleanup_files EXIT TERM INT
 
 echo "Recording workload..."
 perf record -o ${PERF_DATA} -e arm_spe/period=65536/ -vvv -- $TEST_PROGRAM > ${PERF_RECORD_LOG} 2>&1 &
index 72ac6c8..6ded58f 100755 (executable)
@@ -39,7 +39,7 @@ test_json_converter_command()
        echo "Testing Perf Data Convertion Command to JSON"
        perf record -o "$perfdata" -F 99 -g -- perf test -w noploop > /dev/null 2>&1
        perf data convert --to-json "$result" --force -i "$perfdata" >/dev/null 2>&1
-       if [ $(cat "${result}" | wc -l) -gt "0" ] ; then
+       if [ "$(cat ${result} | wc -l)" -gt "0" ] ; then
                echo "Perf Data Converter Command to JSON [SUCCESS]"
        else
                echo "Perf Data Converter Command to JSON [FAILED]"
index 0095abb..92d1515 100755 (executable)
@@ -52,7 +52,7 @@ find_str_or_fail() {
 
 # check if perf is compiled with libtraceevent support
 skip_no_probe_record_support() {
-       perf record -e "sched:sched_switch" -a -- sleep 1 2>&1 | grep "libtraceevent is necessary for tracepoint support" && return 2
+       perf version --build-options | grep -q " OFF .* HAVE_LIBTRACEEVENT" && return 2
        return 0
 }
 
index 0a4bac3..4014487 100755 (executable)
 # SPDX-License-Identifier: GPL-2.0
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
-. $(dirname $0)/lib/probe.sh
+. "$(dirname $0)"/lib/probe.sh
 
 skip_if_no_perf_probe || exit 2
 skip_if_no_perf_trace || exit 2
 
-. $(dirname $0)/lib/probe_vfs_getname.sh
+. "$(dirname $0)"/lib/probe_vfs_getname.sh
 
 trace_open_vfs_getname() {
-       evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+       evts="$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')"
        perf trace -e $evts touch $file 2>&1 | \
-       grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
+       grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +\"?${file}\"?, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }
 
 
index 5009740..706780f 100644 (file)
@@ -27,7 +27,7 @@ static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
                                     struct machine *machine __maybe_unused)
 {
        struct perf_record_stat_config *config = &event->stat_config;
-       struct perf_stat_config stat_config;
+       struct perf_stat_config stat_config = {};
 
 #define HAS(term, val) \
        has_term(config, PERF_STAT_CONFIG_TERM__##term, val)
index f424c0b..f33cfc3 100644 (file)
@@ -113,7 +113,6 @@ DECLARE_SUITE(fdarray__filter);
 DECLARE_SUITE(fdarray__add);
 DECLARE_SUITE(kmod_path__parse);
 DECLARE_SUITE(thread_map);
-DECLARE_SUITE(llvm);
 DECLARE_SUITE(bpf);
 DECLARE_SUITE(session_topology);
 DECLARE_SUITE(thread_map_synthesize);
@@ -129,7 +128,6 @@ DECLARE_SUITE(sdt_event);
 DECLARE_SUITE(is_printable_array);
 DECLARE_SUITE(bitmap_print);
 DECLARE_SUITE(perf_hooks);
-DECLARE_SUITE(clang);
 DECLARE_SUITE(unit_number__scnprint);
 DECLARE_SUITE(mem2node);
 DECLARE_SUITE(maps__merge_in);
index 37c53ba..cc09dca 100755 (executable)
@@ -17,8 +17,7 @@ arch_string()
 
 asm_errno_file()
 {
-       local arch="$1"
-       local header
+       arch="$1"
 
        header="$toolsdir/arch/$arch/include/uapi/asm/errno.h"
        if test -r "$header"; then
@@ -30,8 +29,7 @@ asm_errno_file()
 
 create_errno_lookup_func()
 {
-       local arch=$(arch_string "$1")
-       local nr name
+       arch=$(arch_string "$1")
 
        printf "static const char *errno_to_name__%s(int err)\n{\n\tswitch (err) {\n" $arch
 
@@ -44,8 +42,8 @@ create_errno_lookup_func()
 
 process_arch()
 {
-       local arch="$1"
-       local asm_errno=$(asm_errno_file "$arch")
+       arch="$1"
+       asm_errno=$(asm_errno_file "$arch")
 
        $gcc $CFLAGS $include_path -E -dM -x c $asm_errno \
                |grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \
@@ -56,9 +54,8 @@ process_arch()
 
 create_arch_errno_table_func()
 {
-       local archlist="$1"
-       local default="$2"
-       local arch
+       archlist="$1"
+       default="$2"
 
        printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n'
        printf '{\n'
index 3d12bf0..788e8f6 100644 (file)
@@ -67,15 +67,14 @@ extern struct strarray strarray__socket_level;
 /**
  * augmented_arg: extra payload for syscall pointer arguments
  
- * If perf_sample->raw_size is more than what a syscall sys_enter_FOO puts,
- * then its the arguments contents, so that we can show more than just a
+ * If perf_sample->raw_size is more than what a syscall sys_enter_FOO puts, then
+ * its the arguments contents, so that we can show more than just a
  * pointer. This will be done initially with eBPF, the start of that is at the
- * tools/perf/examples/bpf/augmented_syscalls.c example for the openat, but
- * will eventually be done automagically caching the running kernel tracefs
- * events data into an eBPF C script, that then gets compiled and its .o file
- * cached for subsequent use. For char pointers like the ones for 'open' like
- * syscalls its easy, for the rest we should use DWARF or better, BTF, much
- * more compact.
+ * tools/perf/util/bpf_skel/augmented_syscalls.bpf.c that will eventually be
+ * done automagically caching the running kernel tracefs events data into an
+ * eBPF C script, that then gets compiled and its .o file cached for subsequent
+ * use. For char pointers like the ones for 'open' like syscalls its easy, for
+ * the rest we should use DWARF or better, BTF, much more compact.
  *
  * @size: 8 if all we need is an integer, otherwise all of the augmented arg.
  * @int_arg: will be used for integer like pointer contents, like 'accept's 'upeer_addrlen'
index 3022597..6ecdb3c 100755 (executable)
@@ -19,6 +19,7 @@ arch_mman=${arch_header_dir}/mman.h
 
 printf "static const char *mmap_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+test -f ${arch_mman} && \
 grep -E -q $regex ${arch_mman} && \
 (grep -E $regex ${arch_mman} | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g"      | \
@@ -28,12 +29,14 @@ grep -E -q $regex ${linux_mman} && \
        grep -E -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
        xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
-([ ! -f ${arch_mman} ] || grep -E -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
+( ! test -f ${arch_mman} || \
+grep -E -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
 (grep -E $regex ${header_dir}/mman-common.h | \
        grep -E -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g"      | \
        xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
-([ ! -f ${arch_mman} ] || grep -E -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
+( ! test -f ${arch_mman} || \
+grep -E -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
 (grep -E $regex ${header_dir}/mman.h | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g"      | \
        xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
index 49e8c86..4436fcd 100755 (executable)
@@ -17,12 +17,13 @@ prefix="PROT"
 
 printf "static const char *mmap_prot[] = {\n"
 regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}`
-([ ! -f ${arch_mman} ] || grep -E -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
+( ! test -f ${arch_mman} \
+|| grep -E -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
 (grep -E $regex ${common_mman} | \
        grep -E -vw PROT_NONE | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g"      | \
        xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef ${prefix}_%s\n#define ${prefix}_%s %s\n#endif\n")
-[ -f ${arch_mman} ] && grep -E -q $regex ${arch_mman} &&
+test -f ${arch_mman} && grep -E -q $regex ${arch_mman} &&
 (grep -E $regex ${arch_mman} | \
        grep -E -vw PROT_NONE | \
        sed -r "s/$regex/\2 \1 \1 \1 \2/g"      | \
index fd5c740..b1596df 100755 (executable)
@@ -7,9 +7,9 @@
 prctl_arch_header=${x86_header_dir}/prctl.h
 
 print_range () {
-       local idx=$1
-       local prefix=$2
-       local first_entry=$3
+       idx=$1
+       prefix=$2
+       first_entry=$3
 
        printf "#define x86_arch_prctl_codes_%d_offset %s\n" $idx $first_entry
        printf "static const char *x86_arch_prctl_codes_%d[] = {\n" $idx
index 3aff83c..6b6d714 100644 (file)
@@ -10,5 +10,3 @@ CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))"
 perf-$(CONFIG_SLANG) += browser.o
 perf-$(CONFIG_SLANG) += browsers/
 perf-$(CONFIG_SLANG) += tui/
-
-CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST
index 78fb01d..603d112 100644 (file)
@@ -57,12 +57,12 @@ void ui_browser__gotorc(struct ui_browser *browser, int y, int x)
 void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg,
                               unsigned int width)
 {
-       slsmg_write_nstring(msg, width);
+       SLsmg_write_nstring(msg, width);
 }
 
 void ui_browser__vprintf(struct ui_browser *browser __maybe_unused, const char *fmt, va_list args)
 {
-       slsmg_vprintf(fmt, args);
+       SLsmg_vprintf(fmt, args);
 }
 
 void ui_browser__printf(struct ui_browser *browser __maybe_unused, const char *fmt, ...)
@@ -808,6 +808,6 @@ void ui_browser__init(void)
 
        while (ui_browser__colorsets[i].name) {
                struct ui_browser_colorset *c = &ui_browser__colorsets[i++];
-               sltt_set_color(c->colorset, c->name, c->fg, c->bg);
+               SLtt_set_color(c->colorset, c->name, c->fg, c->bg);
        }
 }
index fdf86f7..7a1d5dd 100644 (file)
@@ -4,8 +4,3 @@ perf-y += map.o
 perf-y += scripts.o
 perf-y += header.o
 perf-y += res_sample.o
-
-CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST
-CFLAGS_hists.o    += -DENABLE_SLFUTURE_CONST
-CFLAGS_map.o      += -DENABLE_SLFUTURE_CONST
-CFLAGS_scripts.o  += -DENABLE_SLFUTURE_CONST
index c7ad9e0..70db5a7 100644 (file)
@@ -407,11 +407,6 @@ static bool hist_browser__selection_has_children(struct hist_browser *browser)
        return container_of(ms, struct callchain_list, ms)->has_children;
 }
 
-static bool hist_browser__he_selection_unfolded(struct hist_browser *browser)
-{
-       return browser->he_selection ? browser->he_selection->unfolded : false;
-}
-
 static bool hist_browser__selection_unfolded(struct hist_browser *browser)
 {
        struct hist_entry *he = browser->he_selection;
@@ -584,8 +579,8 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
        return n;
 }
 
-static void __hist_entry__set_folding(struct hist_entry *he,
-                                     struct hist_browser *hb, bool unfold)
+static void hist_entry__set_folding(struct hist_entry *he,
+                                   struct hist_browser *hb, bool unfold)
 {
        hist_entry__init_have_children(he);
        he->unfolded = unfold ? he->has_children : false;
@@ -603,34 +598,12 @@ static void __hist_entry__set_folding(struct hist_entry *he,
                he->nr_rows = 0;
 }
 
-static void hist_entry__set_folding(struct hist_entry *he,
-                                   struct hist_browser *browser, bool unfold)
-{
-       double percent;
-
-       percent = hist_entry__get_percent_limit(he);
-       if (he->filtered || percent < browser->min_pcnt)
-               return;
-
-       __hist_entry__set_folding(he, browser, unfold);
-
-       if (!he->depth || unfold)
-               browser->nr_hierarchy_entries++;
-       if (he->leaf)
-               browser->nr_callchain_rows += he->nr_rows;
-       else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
-               browser->nr_hierarchy_entries++;
-               he->has_no_entry = true;
-               he->nr_rows = 1;
-       } else
-               he->has_no_entry = false;
-}
-
 static void
 __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
        struct rb_node *nd;
        struct hist_entry *he;
+       double percent;
 
        nd = rb_first_cached(&browser->hists->entries);
        while (nd) {
@@ -640,6 +613,21 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
                nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
 
                hist_entry__set_folding(he, browser, unfold);
+
+               percent = hist_entry__get_percent_limit(he);
+               if (he->filtered || percent < browser->min_pcnt)
+                       continue;
+
+               if (!he->depth || unfold)
+                       browser->nr_hierarchy_entries++;
+               if (he->leaf)
+                       browser->nr_callchain_rows += he->nr_rows;
+               else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+                       browser->nr_hierarchy_entries++;
+                       he->has_no_entry = true;
+                       he->nr_rows = 1;
+               } else
+                       he->has_no_entry = false;
        }
 }
 
@@ -659,8 +647,10 @@ static void hist_browser__set_folding_selected(struct hist_browser *browser, boo
        if (!browser->he_selection)
                return;
 
-       hist_entry__set_folding(browser->he_selection, browser, unfold);
-       browser->b.nr_entries = hist_browser__nr_entries(browser);
+       if (unfold == browser->he_selection->unfolded)
+               return;
+
+       hist_browser__toggle_fold(browser);
 }
 
 static void ui_browser__warn_lost_events(struct ui_browser *browser)
@@ -732,8 +722,8 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l
                hist_browser__set_folding(browser, true);
                break;
        case 'e':
-               /* Expand the selected entry. */
-               hist_browser__set_folding_selected(browser, !hist_browser__he_selection_unfolded(browser));
+               /* Toggle expand/collapse the selected entry. */
+               hist_browser__toggle_fold(browser);
                break;
        case 'H':
                browser->show_headers = !browser->show_headers;
@@ -1779,7 +1769,7 @@ static void hists_browser__hierarchy_headers(struct hist_browser *browser)
        hists_browser__scnprintf_hierarchy_headers(browser, headers,
                                                   sizeof(headers));
 
-       ui_browser__gotorc(&browser->b, 0, 0);
+       ui_browser__gotorc_title(&browser->b, 0, 0);
        ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
        ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
 }
index 991e692..1dff302 100644 (file)
 #define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
 #endif
 
+/* Enable future slang's corrected function prototypes. */
+#define ENABLE_SLFUTURE_CONST 1
+#define ENABLE_SLFUTURE_VOID 1
+
 #ifdef HAVE_SLANG_INCLUDE_SUBDIR
 #include <slang/slang.h>
 #else
 #include <slang.h>
 #endif
 
-#if SLANG_VERSION < 20104
-#define slsmg_printf(msg, args...) \
-       SLsmg_printf((char *)(msg), ##args)
-#define slsmg_vprintf(msg, vargs) \
-       SLsmg_vprintf((char *)(msg), vargs)
-#define slsmg_write_nstring(msg, len) \
-       SLsmg_write_nstring((char *)(msg), len)
-#define sltt_set_color(obj, name, fg, bg) \
-       SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg))
-#else
-#define slsmg_printf SLsmg_printf
-#define slsmg_vprintf SLsmg_vprintf
-#define slsmg_write_nstring SLsmg_write_nstring
-#define sltt_set_color SLtt_set_color
-#endif
-
 #define SL_KEY_UNTAB 0x1000
 
 #endif /* _PERF_UI_SLANG_H_ */
index db4952f..b394513 100644 (file)
@@ -22,7 +22,7 @@ static void tui_helpline__push(const char *msg)
 
        SLsmg_gotorc(SLtt_Screen_Rows - 1, 0);
        SLsmg_set_color(0);
-       SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols);
+       SLsmg_write_nstring(msg, SLtt_Screen_Cols);
        SLsmg_refresh();
        strlcpy(ui_helpline__current, msg, sz);
 }
index c1886aa..605d9e1 100644 (file)
@@ -142,7 +142,7 @@ int ui__init(void)
                goto out;
        }
 
-       SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB);
+       SLkp_define_keysym("^(kB)", SL_KEY_UNTAB);
 
        signal(SIGSEGV, ui__signal_backtrace);
        signal(SIGFPE, ui__signal_backtrace);
index 3c51748..e4d322c 100644 (file)
@@ -106,7 +106,7 @@ int ui_browser__input_window(const char *title, const char *text, char *input,
        SLsmg_draw_box(y, x++, nr_lines, max_len);
        if (title) {
                SLsmg_gotorc(y, x + 1);
-               SLsmg_write_string((char *)title);
+               SLsmg_write_string(title);
        }
        SLsmg_gotorc(++y, x);
        nr_lines -= 7;
@@ -117,12 +117,12 @@ int ui_browser__input_window(const char *title, const char *text, char *input,
        len = 5;
        while (len--) {
                SLsmg_gotorc(y + len - 1, x);
-               SLsmg_write_nstring((char *)" ", max_len);
+               SLsmg_write_nstring(" ", max_len);
        }
        SLsmg_draw_box(y++, x + 1, 3, max_len - 2);
 
        SLsmg_gotorc(y + 3, x);
-       SLsmg_write_nstring((char *)exit_msg, max_len);
+       SLsmg_write_nstring(exit_msg, max_len);
        SLsmg_refresh();
 
        mutex_unlock(&ui__lock);
@@ -197,7 +197,7 @@ void __ui__info_window(const char *title, const char *text, const char *exit_msg
        SLsmg_draw_box(y, x++, nr_lines, max_len);
        if (title) {
                SLsmg_gotorc(y, x + 1);
-               SLsmg_write_string((char *)title);
+               SLsmg_write_string(title);
        }
        SLsmg_gotorc(++y, x);
        if (exit_msg)
@@ -207,9 +207,9 @@ void __ui__info_window(const char *title, const char *text, const char *exit_msg
                                   nr_lines, max_len, 1);
        if (exit_msg) {
                SLsmg_gotorc(y + nr_lines - 2, x);
-               SLsmg_write_nstring((char *)" ", max_len);
+               SLsmg_write_nstring(" ", max_len);
                SLsmg_gotorc(y + nr_lines - 1, x);
-               SLsmg_write_nstring((char *)exit_msg, max_len);
+               SLsmg_write_nstring(exit_msg, max_len);
        }
 }
 
index 96f4ea1..6d657c9 100644 (file)
@@ -1,3 +1,6 @@
+include $(srctree)/tools/scripts/Makefile.include
+include $(srctree)/tools/scripts/utilities.mak
+
 perf-y += arm64-frame-pointer-unwind-support.o
 perf-y += addr_location.o
 perf-y += annotate.o
@@ -20,13 +23,13 @@ perf-y += evswitch.o
 perf-y += find_bit.o
 perf-y += get_current_dir_name.o
 perf-y += levenshtein.o
-perf-y += llvm-utils.o
 perf-y += mmap.o
 perf-y += memswap.o
 perf-y += parse-events.o
 perf-y += print-events.o
 perf-y += tracepoint.o
 perf-y += perf_regs.o
+perf-y += perf-regs-arch/
 perf-y += path.o
 perf-y += print_binary.o
 perf-y += rlimit.o
@@ -147,7 +150,6 @@ perf-y += list_sort.o
 perf-y += mutex.o
 perf-y += sharded_mutex.o
 
-perf-$(CONFIG_LIBBPF) += bpf-loader.o
 perf-$(CONFIG_LIBBPF) += bpf_map.o
 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
@@ -165,7 +167,6 @@ ifeq ($(CONFIG_LIBTRACEEVENT),y)
   perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
 endif
 
-perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
 perf-$(CONFIG_LIBELF) += symbol-elf.o
 perf-$(CONFIG_LIBELF) += probe-file.o
 perf-$(CONFIG_LIBELF) += probe-event.o
@@ -229,12 +230,9 @@ perf-y += perf-hooks.o
 perf-$(CONFIG_LIBBPF) += bpf-event.o
 perf-$(CONFIG_LIBBPF) += bpf-utils.o
 
-perf-$(CONFIG_CXX) += c++/
-
 perf-$(CONFIG_LIBPFM4) += pfm.o
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
-CFLAGS_llvm-utils.o += -DLIBBPF_INCLUDE_DIR="BUILD_STR($(libbpf_include_dir_SQ))"
 
 # avoid compiler warnings in 32-bit mode
 CFLAGS_genelf_debug.o  += -Wno-packed
@@ -246,7 +244,7 @@ $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-
 
 $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y
        $(call rule_mkdir)
-       $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
+       $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) $(BISON_FALLBACK_FLAGS) \
                -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
 
 $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c
@@ -279,28 +277,58 @@ $(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filt
        $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
                -o $(OUTPUT)util/bpf-filter-bison.c -p perf_bpf_filter_
 
-FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e  's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26)
-ifeq ($(FLEX_GE_26),1)
-  flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations
-  CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?)
-  ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1)
-    flex_flags += -Wno-misleading-indentation
+FLEX_VERSION := $(shell $(FLEX) --version | cut -d' ' -f2)
+
+FLEX_GE_260 := $(call version-ge3,$(FLEX_VERSION),2.6.0)
+ifeq ($(FLEX_GE_260),1)
+  flex_flags := -Wno-redundant-decls -Wno-switch-default -Wno-unused-function -Wno-misleading-indentation
+
+  # Some newer clang and gcc version complain about this
+  # util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable]
+  #  int yynerrs = 0;
+
+  flex_flags += -Wno-unused-but-set-variable
+
+  FLEX_LT_262 := $(call version-lt3,$(FLEX_VERSION),2.6.2)
+  ifeq ($(FLEX_LT_262),1)
+    flex_flags += -Wno-sign-compare
   endif
 else
   flex_flags := -w
 endif
-CFLAGS_parse-events-flex.o  += $(flex_flags)
-CFLAGS_pmu-flex.o           += $(flex_flags)
-CFLAGS_expr-flex.o          += $(flex_flags)
-CFLAGS_bpf-filter-flex.o    += $(flex_flags)
 
-bison_flags := -DYYENABLE_NLS=0
-BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
-ifeq ($(BISON_GE_35),1)
-  bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option
+# Some newer clang and gcc version complain about this
+# util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable]
+#  int yynerrs = 0;
+
+bison_flags := -DYYENABLE_NLS=0 -Wno-unused-but-set-variable
+
+# Old clangs don't grok -Wno-unused-but-set-variable, remove it
+ifeq ($(CC_NO_CLANG), 0)
+  CLANG_VERSION := $(shell $(CLANG) --version | head -1 | sed 's/.*clang version \([[:digit:]]\+.[[:digit:]]\+.[[:digit:]]\+\).*/\1/g')
+  ifeq ($(call version-lt3,$(CLANG_VERSION),13.0.0),1)
+    bison_flags := $(subst -Wno-unused-but-set-variable,,$(bison_flags))
+    flex_flags := $(subst -Wno-unused-but-set-variable,,$(flex_flags))
+  endif
+endif
+
+BISON_GE_382 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 382)
+ifeq ($(BISON_GE_382),1)
+  bison_flags += -Wno-switch-enum
 else
   bison_flags += -w
 endif
+
+BISON_LT_381 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 381)
+ifeq ($(BISON_LT_381),1)
+  bison_flags += -DYYNOMEM=YYABORT
+endif
+
+CFLAGS_parse-events-flex.o  += $(flex_flags) -Wno-unused-label
+CFLAGS_pmu-flex.o           += $(flex_flags)
+CFLAGS_expr-flex.o          += $(flex_flags)
+CFLAGS_bpf-filter-flex.o    += $(flex_flags)
+
 CFLAGS_parse-events-bison.o += $(bison_flags)
 CFLAGS_pmu-bison.o          += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
 CFLAGS_expr-bison.o         += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
@@ -316,8 +344,6 @@ CFLAGS_find_bit.o      += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
 CFLAGS_rbtree.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_libstring.o     += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_hweight.o       += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
-CFLAGS_parse-events.o  += -Wno-redundant-decls
-CFLAGS_expr.o          += -Wno-redundant-decls
 CFLAGS_header.o        += -include $(OUTPUT)PERF-VERSION-FILE
 CFLAGS_arm-spe.o       += -I$(srctree)/tools/arch/arm64/include/
 
index 6a6ddba..9d0ce88 100644 (file)
@@ -15,7 +15,6 @@
 #include "session.h"
 #include "evlist.h"
 #include "sample-raw.h"
-#include "pmu-events/pmu-events.h"
 #include "util/sample.h"
 
 static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
index ba988a1..82956ad 100644 (file)
@@ -1846,8 +1846,11 @@ static int symbol__disassemble_bpf(struct symbol *sym,
        perf_exe(tpath, sizeof(tpath));
 
        bfdf = bfd_openr(tpath, NULL);
-       assert(bfdf);
-       assert(bfd_check_format(bfdf, bfd_object));
+       if (bfdf == NULL)
+               abort();
+
+       if (!bfd_check_format(bfdf, bfd_object))
+               abort();
 
        s = open_memstream(&buf, &buf_size);
        if (!s) {
@@ -1895,7 +1898,8 @@ static int symbol__disassemble_bpf(struct symbol *sym,
 #else
        disassemble = disassembler(bfdf);
 #endif
-       assert(disassemble);
+       if (disassemble == NULL)
+               abort();
 
        fflush(s);
        do {
index 0b30688..b515449 100644 (file)
@@ -9,8 +9,8 @@
 #include "util/evsel.h"
 
 #include "util/bpf-filter.h"
-#include "util/bpf-filter-flex.h"
-#include "util/bpf-filter-bison.h"
+#include <util/bpf-filter-flex.h>
+#include <util/bpf-filter-bison.h>
 
 #include "bpf_skel/sample-filter.h"
 #include "bpf_skel/sample_filter.skel.h"
@@ -62,6 +62,16 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr *
        if (evsel->core.attr.sample_type & expr->sample_flags)
                return 0;
 
+       if (expr->op == PBF_OP_GROUP_BEGIN) {
+               struct perf_bpf_filter_expr *group;
+
+               list_for_each_entry(group, &expr->groups, list) {
+                       if (check_sample_flags(evsel, group) < 0)
+                               return -1;
+               }
+               return 0;
+       }
+
        info = get_sample_info(expr->sample_flags);
        if (info == NULL) {
                pr_err("Error: %s event does not have sample flags %lx\n",
index 07d6c79..5dfa948 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/list.h>
 #include "bpf-filter.h"
 
+int perf_bpf_filter_lex(void);
+
 static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
                                  char const *msg)
 {
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
deleted file mode 100644 (file)
index 44cde27..0000000
+++ /dev/null
@@ -1,2110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * bpf-loader.c
- *
- * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
- * Copyright (C) 2015 Huawei Inc.
- */
-
-#include <linux/bpf.h>
-#include <bpf/libbpf.h>
-#include <bpf/bpf.h>
-#include <linux/filter.h>
-#include <linux/err.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/zalloc.h>
-#include <errno.h>
-#include <stdlib.h>
-#include "debug.h"
-#include "evlist.h"
-#include "bpf-loader.h"
-#include "bpf-prologue.h"
-#include "probe-event.h"
-#include "probe-finder.h" // for MAX_PROBES
-#include "parse-events.h"
-#include "strfilter.h"
-#include "util.h"
-#include "llvm-utils.h"
-#include "c++/clang-c.h"
-#include "util/hashmap.h"
-#include "asm/bug.h"
-
-#include <internal/xyarray.h>
-
-/* temporarily disable libbpf deprecation warnings */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)),
-                             const char *fmt, va_list args)
-{
-       return veprintf(1, verbose, pr_fmt(fmt), args);
-}
-
-struct bpf_prog_priv {
-       bool is_tp;
-       char *sys_name;
-       char *evt_name;
-       struct perf_probe_event pev;
-       bool need_prologue;
-       struct bpf_insn *insns_buf;
-       int nr_types;
-       int *type_mapping;
-       int *prologue_fds;
-};
-
-struct bpf_perf_object {
-       struct list_head list;
-       struct bpf_object *obj;
-};
-
-struct bpf_preproc_result {
-       struct bpf_insn *new_insn_ptr;
-       int new_insn_cnt;
-};
-
-static LIST_HEAD(bpf_objects_list);
-static struct hashmap *bpf_program_hash;
-static struct hashmap *bpf_map_hash;
-
-static struct bpf_perf_object *
-bpf_perf_object__next(struct bpf_perf_object *prev)
-{
-       if (!prev) {
-               if (list_empty(&bpf_objects_list))
-                       return NULL;
-
-               return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list);
-       }
-       if (list_is_last(&prev->list, &bpf_objects_list))
-               return NULL;
-
-       return list_next_entry(prev, list);
-}
-
-#define bpf_perf_object__for_each(perf_obj, tmp)       \
-       for ((perf_obj) = bpf_perf_object__next(NULL),  \
-            (tmp) = bpf_perf_object__next(perf_obj);   \
-            (perf_obj) != NULL;                        \
-            (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp))
-
-static bool libbpf_initialized;
-static int libbpf_sec_handler;
-
-static int bpf_perf_object__add(struct bpf_object *obj)
-{
-       struct bpf_perf_object *perf_obj = zalloc(sizeof(*perf_obj));
-
-       if (perf_obj) {
-               INIT_LIST_HEAD(&perf_obj->list);
-               perf_obj->obj = obj;
-               list_add_tail(&perf_obj->list, &bpf_objects_list);
-       }
-       return perf_obj ? 0 : -ENOMEM;
-}
-
-static void *program_priv(const struct bpf_program *prog)
-{
-       void *priv;
-
-       if (IS_ERR_OR_NULL(bpf_program_hash))
-               return NULL;
-       if (!hashmap__find(bpf_program_hash, prog, &priv))
-               return NULL;
-       return priv;
-}
-
-static struct bpf_insn prologue_init_insn[] = {
-       BPF_MOV64_IMM(BPF_REG_2, 0),
-       BPF_MOV64_IMM(BPF_REG_3, 0),
-       BPF_MOV64_IMM(BPF_REG_4, 0),
-       BPF_MOV64_IMM(BPF_REG_5, 0),
-};
-
-static int libbpf_prog_prepare_load_fn(struct bpf_program *prog,
-                                      struct bpf_prog_load_opts *opts __maybe_unused,
-                                      long cookie __maybe_unused)
-{
-       size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn);
-       size_t orig_insn_cnt, insn_cnt, init_size, orig_size;
-       struct bpf_prog_priv *priv = program_priv(prog);
-       const struct bpf_insn *orig_insn;
-       struct bpf_insn *insn;
-
-       if (IS_ERR_OR_NULL(priv)) {
-               pr_debug("bpf: failed to get private field\n");
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-
-       if (!priv->need_prologue)
-               return 0;
-
-       /* prepend initialization code to program instructions */
-       orig_insn = bpf_program__insns(prog);
-       orig_insn_cnt = bpf_program__insn_cnt(prog);
-       init_size = init_size_cnt * sizeof(*insn);
-       orig_size = orig_insn_cnt * sizeof(*insn);
-
-       insn_cnt = orig_insn_cnt + init_size_cnt;
-       insn = malloc(insn_cnt * sizeof(*insn));
-       if (!insn)
-               return -ENOMEM;
-
-       memcpy(insn, prologue_init_insn, init_size);
-       memcpy((char *) insn + init_size, orig_insn, orig_size);
-       bpf_program__set_insns(prog, insn, insn_cnt);
-       return 0;
-}
-
-static int libbpf_init(void)
-{
-       LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts,
-               .prog_prepare_load_fn = libbpf_prog_prepare_load_fn,
-       );
-
-       if (libbpf_initialized)
-               return 0;
-
-       libbpf_set_print(libbpf_perf_print);
-       libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE,
-                                                         0, &handler_opts);
-       if (libbpf_sec_handler < 0) {
-               pr_debug("bpf: failed to register libbpf section handler: %d\n",
-                        libbpf_sec_handler);
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-       libbpf_initialized = true;
-       return 0;
-}
-
-struct bpf_object *
-bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
-{
-       LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name);
-       struct bpf_object *obj;
-       int err;
-
-       err = libbpf_init();
-       if (err)
-               return ERR_PTR(err);
-
-       obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
-       if (IS_ERR_OR_NULL(obj)) {
-               pr_debug("bpf: failed to load buffer\n");
-               return ERR_PTR(-EINVAL);
-       }
-
-       if (bpf_perf_object__add(obj)) {
-               bpf_object__close(obj);
-               return ERR_PTR(-ENOMEM);
-       }
-
-       return obj;
-}
-
-static void bpf_perf_object__close(struct bpf_perf_object *perf_obj)
-{
-       list_del(&perf_obj->list);
-       bpf_object__close(perf_obj->obj);
-       free(perf_obj);
-}
-
-struct bpf_object *bpf__prepare_load(const char *filename, bool source)
-{
-       LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename);
-       struct bpf_object *obj;
-       int err;
-
-       err = libbpf_init();
-       if (err)
-               return ERR_PTR(err);
-
-       if (source) {
-               void *obj_buf;
-               size_t obj_buf_sz;
-
-               perf_clang__init();
-               err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz);
-               perf_clang__cleanup();
-               if (err) {
-                       pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err);
-                       err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz);
-                       if (err)
-                               return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
-               } else
-                       pr_debug("bpf: successful builtin compilation\n");
-               obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
-
-               if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
-                       llvm__dump_obj(filename, obj_buf, obj_buf_sz);
-
-               free(obj_buf);
-       } else {
-               obj = bpf_object__open(filename);
-       }
-
-       if (IS_ERR_OR_NULL(obj)) {
-               pr_debug("bpf: failed to load %s\n", filename);
-               return obj;
-       }
-
-       if (bpf_perf_object__add(obj)) {
-               bpf_object__close(obj);
-               return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
-       }
-
-       return obj;
-}
-
-static void close_prologue_programs(struct bpf_prog_priv *priv)
-{
-       struct perf_probe_event *pev;
-       int i, fd;
-
-       if (!priv->need_prologue)
-               return;
-       pev = &priv->pev;
-       for (i = 0; i < pev->ntevs; i++) {
-               fd = priv->prologue_fds[i];
-               if (fd != -1)
-                       close(fd);
-       }
-}
-
-static void
-clear_prog_priv(const struct bpf_program *prog __maybe_unused,
-               void *_priv)
-{
-       struct bpf_prog_priv *priv = _priv;
-
-       close_prologue_programs(priv);
-       cleanup_perf_probe_events(&priv->pev, 1);
-       zfree(&priv->insns_buf);
-       zfree(&priv->prologue_fds);
-       zfree(&priv->type_mapping);
-       zfree(&priv->sys_name);
-       zfree(&priv->evt_name);
-       free(priv);
-}
-
-static void bpf_program_hash_free(void)
-{
-       struct hashmap_entry *cur;
-       size_t bkt;
-
-       if (IS_ERR_OR_NULL(bpf_program_hash))
-               return;
-
-       hashmap__for_each_entry(bpf_program_hash, cur, bkt)
-               clear_prog_priv(cur->pkey, cur->pvalue);
-
-       hashmap__free(bpf_program_hash);
-       bpf_program_hash = NULL;
-}
-
-static void bpf_map_hash_free(void);
-
-void bpf__clear(void)
-{
-       struct bpf_perf_object *perf_obj, *tmp;
-
-       bpf_perf_object__for_each(perf_obj, tmp) {
-               bpf__unprobe(perf_obj->obj);
-               bpf_perf_object__close(perf_obj);
-       }
-
-       bpf_program_hash_free();
-       bpf_map_hash_free();
-}
-
-static size_t ptr_hash(const long __key, void *ctx __maybe_unused)
-{
-       return __key;
-}
-
-static bool ptr_equal(long key1, long key2, void *ctx __maybe_unused)
-{
-       return key1 == key2;
-}
-
-static int program_set_priv(struct bpf_program *prog, void *priv)
-{
-       void *old_priv;
-
-       /*
-        * Should not happen, we warn about it in the
-        * caller function - config_bpf_program
-        */
-       if (IS_ERR(bpf_program_hash))
-               return PTR_ERR(bpf_program_hash);
-
-       if (!bpf_program_hash) {
-               bpf_program_hash = hashmap__new(ptr_hash, ptr_equal, NULL);
-               if (IS_ERR(bpf_program_hash))
-                       return PTR_ERR(bpf_program_hash);
-       }
-
-       old_priv = program_priv(prog);
-       if (old_priv) {
-               clear_prog_priv(prog, old_priv);
-               return hashmap__set(bpf_program_hash, prog, priv, NULL, NULL);
-       }
-       return hashmap__add(bpf_program_hash, prog, priv);
-}
-
-static int
-prog_config__exec(const char *value, struct perf_probe_event *pev)
-{
-       pev->uprobes = true;
-       pev->target = strdup(value);
-       if (!pev->target)
-               return -ENOMEM;
-       return 0;
-}
-
-static int
-prog_config__module(const char *value, struct perf_probe_event *pev)
-{
-       pev->uprobes = false;
-       pev->target = strdup(value);
-       if (!pev->target)
-               return -ENOMEM;
-       return 0;
-}
-
-static int
-prog_config__bool(const char *value, bool *pbool, bool invert)
-{
-       int err;
-       bool bool_value;
-
-       if (!pbool)
-               return -EINVAL;
-
-       err = strtobool(value, &bool_value);
-       if (err)
-               return err;
-
-       *pbool = invert ? !bool_value : bool_value;
-       return 0;
-}
-
-static int
-prog_config__inlines(const char *value,
-                    struct perf_probe_event *pev __maybe_unused)
-{
-       return prog_config__bool(value, &probe_conf.no_inlines, true);
-}
-
-static int
-prog_config__force(const char *value,
-                  struct perf_probe_event *pev __maybe_unused)
-{
-       return prog_config__bool(value, &probe_conf.force_add, false);
-}
-
-static struct {
-       const char *key;
-       const char *usage;
-       const char *desc;
-       int (*func)(const char *, struct perf_probe_event *);
-} bpf_prog_config_terms[] = {
-       {
-               .key    = "exec",
-               .usage  = "exec=<full path of file>",
-               .desc   = "Set uprobe target",
-               .func   = prog_config__exec,
-       },
-       {
-               .key    = "module",
-               .usage  = "module=<module name>    ",
-               .desc   = "Set kprobe module",
-               .func   = prog_config__module,
-       },
-       {
-               .key    = "inlines",
-               .usage  = "inlines=[yes|no]        ",
-               .desc   = "Probe at inline symbol",
-               .func   = prog_config__inlines,
-       },
-       {
-               .key    = "force",
-               .usage  = "force=[yes|no]          ",
-               .desc   = "Forcibly add events with existing name",
-               .func   = prog_config__force,
-       },
-};
-
-static int
-do_prog_config(const char *key, const char *value,
-              struct perf_probe_event *pev)
-{
-       unsigned int i;
-
-       pr_debug("config bpf program: %s=%s\n", key, value);
-       for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
-               if (strcmp(key, bpf_prog_config_terms[i].key) == 0)
-                       return bpf_prog_config_terms[i].func(value, pev);
-
-       pr_debug("BPF: ERROR: invalid program config option: %s=%s\n",
-                key, value);
-
-       pr_debug("\nHint: Valid options are:\n");
-       for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
-               pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage,
-                        bpf_prog_config_terms[i].desc);
-       pr_debug("\n");
-
-       return -BPF_LOADER_ERRNO__PROGCONF_TERM;
-}
-
-static const char *
-parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev)
-{
-       char *text = strdup(config_str);
-       char *sep, *line;
-       const char *main_str = NULL;
-       int err = 0;
-
-       if (!text) {
-               pr_debug("Not enough memory: dup config_str failed\n");
-               return ERR_PTR(-ENOMEM);
-       }
-
-       line = text;
-       while ((sep = strchr(line, ';'))) {
-               char *equ;
-
-               *sep = '\0';
-               equ = strchr(line, '=');
-               if (!equ) {
-                       pr_warning("WARNING: invalid config in BPF object: %s\n",
-                                  line);
-                       pr_warning("\tShould be 'key=value'.\n");
-                       goto nextline;
-               }
-               *equ = '\0';
-
-               err = do_prog_config(line, equ + 1, pev);
-               if (err)
-                       break;
-nextline:
-               line = sep + 1;
-       }
-
-       if (!err)
-               main_str = config_str + (line - text);
-       free(text);
-
-       return err ? ERR_PTR(err) : main_str;
-}
-
-static int
-parse_prog_config(const char *config_str, const char **p_main_str,
-                 bool *is_tp, struct perf_probe_event *pev)
-{
-       int err;
-       const char *main_str = parse_prog_config_kvpair(config_str, pev);
-
-       if (IS_ERR(main_str))
-               return PTR_ERR(main_str);
-
-       *p_main_str = main_str;
-       if (!strchr(main_str, '=')) {
-               /* Is a tracepoint event? */
-               const char *s = strchr(main_str, ':');
-
-               if (!s) {
-                       pr_debug("bpf: '%s' is not a valid tracepoint\n",
-                                config_str);
-                       return -BPF_LOADER_ERRNO__CONFIG;
-               }
-
-               *is_tp = true;
-               return 0;
-       }
-
-       *is_tp = false;
-       err = parse_perf_probe_command(main_str, pev);
-       if (err < 0) {
-               pr_debug("bpf: '%s' is not a valid config string\n",
-                        config_str);
-               /* parse failed, don't need clear pev. */
-               return -BPF_LOADER_ERRNO__CONFIG;
-       }
-       return 0;
-}
-
-static int
-config_bpf_program(struct bpf_program *prog)
-{
-       struct perf_probe_event *pev = NULL;
-       struct bpf_prog_priv *priv = NULL;
-       const char *config_str, *main_str;
-       bool is_tp = false;
-       int err;
-
-       /* Initialize per-program probing setting */
-       probe_conf.no_inlines = false;
-       probe_conf.force_add = false;
-
-       priv = calloc(sizeof(*priv), 1);
-       if (!priv) {
-               pr_debug("bpf: failed to alloc priv\n");
-               return -ENOMEM;
-       }
-       pev = &priv->pev;
-
-       config_str = bpf_program__section_name(prog);
-       pr_debug("bpf: config program '%s'\n", config_str);
-       err = parse_prog_config(config_str, &main_str, &is_tp, pev);
-       if (err)
-               goto errout;
-
-       if (is_tp) {
-               char *s = strchr(main_str, ':');
-
-               priv->is_tp = true;
-               priv->sys_name = strndup(main_str, s - main_str);
-               priv->evt_name = strdup(s + 1);
-               goto set_priv;
-       }
-
-       if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
-               pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
-                        config_str, PERF_BPF_PROBE_GROUP);
-               err = -BPF_LOADER_ERRNO__GROUP;
-               goto errout;
-       } else if (!pev->group)
-               pev->group = strdup(PERF_BPF_PROBE_GROUP);
-
-       if (!pev->group) {
-               pr_debug("bpf: strdup failed\n");
-               err = -ENOMEM;
-               goto errout;
-       }
-
-       if (!pev->event) {
-               pr_debug("bpf: '%s': event name is missing. Section name should be 'key=value'\n",
-                        config_str);
-               err = -BPF_LOADER_ERRNO__EVENTNAME;
-               goto errout;
-       }
-       pr_debug("bpf: config '%s' is ok\n", config_str);
-
-set_priv:
-       err = program_set_priv(prog, priv);
-       if (err) {
-               pr_debug("Failed to set priv for program '%s'\n", config_str);
-               goto errout;
-       }
-
-       return 0;
-
-errout:
-       if (pev)
-               clear_perf_probe_event(pev);
-       free(priv);
-       return err;
-}
-
-static int bpf__prepare_probe(void)
-{
-       static int err = 0;
-       static bool initialized = false;
-
-       /*
-        * Make err static, so if init failed the first, bpf__prepare_probe()
-        * fails each time without calling init_probe_symbol_maps multiple
-        * times.
-        */
-       if (initialized)
-               return err;
-
-       initialized = true;
-       err = init_probe_symbol_maps(false);
-       if (err < 0)
-               pr_debug("Failed to init_probe_symbol_maps\n");
-       probe_conf.max_probes = MAX_PROBES;
-       return err;
-}
-
-static int
-preproc_gen_prologue(struct bpf_program *prog, int n,
-                    const struct bpf_insn *orig_insns, int orig_insns_cnt,
-                    struct bpf_preproc_result *res)
-{
-       struct bpf_prog_priv *priv = program_priv(prog);
-       struct probe_trace_event *tev;
-       struct perf_probe_event *pev;
-       struct bpf_insn *buf;
-       size_t prologue_cnt = 0;
-       int i, err;
-
-       if (IS_ERR_OR_NULL(priv) || priv->is_tp)
-               goto errout;
-
-       pev = &priv->pev;
-
-       if (n < 0 || n >= priv->nr_types)
-               goto errout;
-
-       /* Find a tev belongs to that type */
-       for (i = 0; i < pev->ntevs; i++) {
-               if (priv->type_mapping[i] == n)
-                       break;
-       }
-
-       if (i >= pev->ntevs) {
-               pr_debug("Internal error: prologue type %d not found\n", n);
-               return -BPF_LOADER_ERRNO__PROLOGUE;
-       }
-
-       tev = &pev->tevs[i];
-
-       buf = priv->insns_buf;
-       err = bpf__gen_prologue(tev->args, tev->nargs,
-                               buf, &prologue_cnt,
-                               BPF_MAXINSNS - orig_insns_cnt);
-       if (err) {
-               const char *title;
-
-               title = bpf_program__section_name(prog);
-               pr_debug("Failed to generate prologue for program %s\n",
-                        title);
-               return err;
-       }
-
-       memcpy(&buf[prologue_cnt], orig_insns,
-              sizeof(struct bpf_insn) * orig_insns_cnt);
-
-       res->new_insn_ptr = buf;
-       res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
-       return 0;
-
-errout:
-       pr_debug("Internal error in preproc_gen_prologue\n");
-       return -BPF_LOADER_ERRNO__PROLOGUE;
-}
-
-/*
- * compare_tev_args is reflexive, transitive and antisymmetric.
- * I can proof it but this margin is too narrow to contain.
- */
-static int compare_tev_args(const void *ptev1, const void *ptev2)
-{
-       int i, ret;
-       const struct probe_trace_event *tev1 =
-               *(const struct probe_trace_event **)ptev1;
-       const struct probe_trace_event *tev2 =
-               *(const struct probe_trace_event **)ptev2;
-
-       ret = tev2->nargs - tev1->nargs;
-       if (ret)
-               return ret;
-
-       for (i = 0; i < tev1->nargs; i++) {
-               struct probe_trace_arg *arg1, *arg2;
-               struct probe_trace_arg_ref *ref1, *ref2;
-
-               arg1 = &tev1->args[i];
-               arg2 = &tev2->args[i];
-
-               ret = strcmp(arg1->value, arg2->value);
-               if (ret)
-                       return ret;
-
-               ref1 = arg1->ref;
-               ref2 = arg2->ref;
-
-               while (ref1 && ref2) {
-                       ret = ref2->offset - ref1->offset;
-                       if (ret)
-                               return ret;
-
-                       ref1 = ref1->next;
-                       ref2 = ref2->next;
-               }
-
-               if (ref1 || ref2)
-                       return ref2 ? 1 : -1;
-       }
-
-       return 0;
-}
-
-/*
- * Assign a type number to each tevs in a pev.
- * mapping is an array with same slots as tevs in that pev.
- * nr_types will be set to number of types.
- */
-static int map_prologue(struct perf_probe_event *pev, int *mapping,
-                       int *nr_types)
-{
-       int i, type = 0;
-       struct probe_trace_event **ptevs;
-
-       size_t array_sz = sizeof(*ptevs) * pev->ntevs;
-
-       ptevs = malloc(array_sz);
-       if (!ptevs) {
-               pr_debug("Not enough memory: alloc ptevs failed\n");
-               return -ENOMEM;
-       }
-
-       pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
-       for (i = 0; i < pev->ntevs; i++)
-               ptevs[i] = &pev->tevs[i];
-
-       qsort(ptevs, pev->ntevs, sizeof(*ptevs),
-             compare_tev_args);
-
-       for (i = 0; i < pev->ntevs; i++) {
-               int n;
-
-               n = ptevs[i] - pev->tevs;
-               if (i == 0) {
-                       mapping[n] = type;
-                       pr_debug("mapping[%d]=%d\n", n, type);
-                       continue;
-               }
-
-               if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
-                       mapping[n] = type;
-               else
-                       mapping[n] = ++type;
-
-               pr_debug("mapping[%d]=%d\n", n, mapping[n]);
-       }
-       free(ptevs);
-       *nr_types = type + 1;
-
-       return 0;
-}
-
-static int hook_load_preprocessor(struct bpf_program *prog)
-{
-       struct bpf_prog_priv *priv = program_priv(prog);
-       struct perf_probe_event *pev;
-       bool need_prologue = false;
-       int i;
-
-       if (IS_ERR_OR_NULL(priv)) {
-               pr_debug("Internal error when hook preprocessor\n");
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-
-       if (priv->is_tp) {
-               priv->need_prologue = false;
-               return 0;
-       }
-
-       pev = &priv->pev;
-       for (i = 0; i < pev->ntevs; i++) {
-               struct probe_trace_event *tev = &pev->tevs[i];
-
-               if (tev->nargs > 0) {
-                       need_prologue = true;
-                       break;
-               }
-       }
-
-       /*
-        * Since all tevs don't have argument, we don't need generate
-        * prologue.
-        */
-       if (!need_prologue) {
-               priv->need_prologue = false;
-               return 0;
-       }
-
-       priv->need_prologue = true;
-       priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
-       if (!priv->insns_buf) {
-               pr_debug("Not enough memory: alloc insns_buf failed\n");
-               return -ENOMEM;
-       }
-
-       priv->prologue_fds = malloc(sizeof(int) * pev->ntevs);
-       if (!priv->prologue_fds) {
-               pr_debug("Not enough memory: alloc prologue fds failed\n");
-               return -ENOMEM;
-       }
-       memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs);
-
-       priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
-       if (!priv->type_mapping) {
-               pr_debug("Not enough memory: alloc type_mapping failed\n");
-               return -ENOMEM;
-       }
-       memset(priv->type_mapping, -1,
-              sizeof(int) * pev->ntevs);
-
-       return map_prologue(pev, priv->type_mapping, &priv->nr_types);
-}
-
-int bpf__probe(struct bpf_object *obj)
-{
-       int err = 0;
-       struct bpf_program *prog;
-       struct bpf_prog_priv *priv;
-       struct perf_probe_event *pev;
-
-       err = bpf__prepare_probe();
-       if (err) {
-               pr_debug("bpf__prepare_probe failed\n");
-               return err;
-       }
-
-       bpf_object__for_each_program(prog, obj) {
-               err = config_bpf_program(prog);
-               if (err)
-                       goto out;
-
-               priv = program_priv(prog);
-               if (IS_ERR_OR_NULL(priv)) {
-                       if (!priv)
-                               err = -BPF_LOADER_ERRNO__INTERNAL;
-                       else
-                               err = PTR_ERR(priv);
-                       goto out;
-               }
-
-               if (priv->is_tp) {
-                       bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
-                       continue;
-               }
-
-               bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE);
-               pev = &priv->pev;
-
-               err = convert_perf_probe_events(pev, 1);
-               if (err < 0) {
-                       pr_debug("bpf_probe: failed to convert perf probe events\n");
-                       goto out;
-               }
-
-               err = apply_perf_probe_events(pev, 1);
-               if (err < 0) {
-                       pr_debug("bpf_probe: failed to apply perf probe events\n");
-                       goto out;
-               }
-
-               /*
-                * After probing, let's consider prologue, which
-                * adds program fetcher to BPF programs.
-                *
-                * hook_load_preprocessor() hooks pre-processor
-                * to bpf_program, let it generate prologue
-                * dynamically during loading.
-                */
-               err = hook_load_preprocessor(prog);
-               if (err)
-                       goto out;
-       }
-out:
-       return err < 0 ? err : 0;
-}
-
-#define EVENTS_WRITE_BUFSIZE  4096
-int bpf__unprobe(struct bpf_object *obj)
-{
-       int err, ret = 0;
-       struct bpf_program *prog;
-
-       bpf_object__for_each_program(prog, obj) {
-               struct bpf_prog_priv *priv = program_priv(prog);
-               int i;
-
-               if (IS_ERR_OR_NULL(priv) || priv->is_tp)
-                       continue;
-
-               for (i = 0; i < priv->pev.ntevs; i++) {
-                       struct probe_trace_event *tev = &priv->pev.tevs[i];
-                       char name_buf[EVENTS_WRITE_BUFSIZE];
-                       struct strfilter *delfilter;
-
-                       snprintf(name_buf, EVENTS_WRITE_BUFSIZE,
-                                "%s:%s", tev->group, tev->event);
-                       name_buf[EVENTS_WRITE_BUFSIZE - 1] = '\0';
-
-                       delfilter = strfilter__new(name_buf, NULL);
-                       if (!delfilter) {
-                               pr_debug("Failed to create filter for unprobing\n");
-                               ret = -ENOMEM;
-                               continue;
-                       }
-
-                       err = del_perf_probe_events(delfilter);
-                       strfilter__delete(delfilter);
-                       if (err) {
-                               pr_debug("Failed to delete %s\n", name_buf);
-                               ret = err;
-                               continue;
-                       }
-               }
-       }
-       return ret;
-}
-
-static int bpf_object__load_prologue(struct bpf_object *obj)
-{
-       int init_cnt = ARRAY_SIZE(prologue_init_insn);
-       const struct bpf_insn *orig_insns;
-       struct bpf_preproc_result res;
-       struct perf_probe_event *pev;
-       struct bpf_program *prog;
-       int orig_insns_cnt;
-
-       bpf_object__for_each_program(prog, obj) {
-               struct bpf_prog_priv *priv = program_priv(prog);
-               int err, i, fd;
-
-               if (IS_ERR_OR_NULL(priv)) {
-                       pr_debug("bpf: failed to get private field\n");
-                       return -BPF_LOADER_ERRNO__INTERNAL;
-               }
-
-               if (!priv->need_prologue)
-                       continue;
-
-               /*
-                * For each program that needs prologue we do following:
-                *
-                * - take its current instructions and use them
-                *   to generate the new code with prologue
-                * - load new instructions with bpf_prog_load
-                *   and keep the fd in prologue_fds
-                * - new fd will be used in bpf__foreach_event
-                *   to connect this program with perf evsel
-                */
-               orig_insns = bpf_program__insns(prog);
-               orig_insns_cnt = bpf_program__insn_cnt(prog);
-
-               pev = &priv->pev;
-               for (i = 0; i < pev->ntevs; i++) {
-                       /*
-                        * Skipping artificall prologue_init_insn instructions
-                        * (init_cnt), so the prologue can be generated instead
-                        * of them.
-                        */
-                       err = preproc_gen_prologue(prog, i,
-                                                  orig_insns + init_cnt,
-                                                  orig_insns_cnt - init_cnt,
-                                                  &res);
-                       if (err)
-                               return err;
-
-                       fd = bpf_prog_load(bpf_program__get_type(prog),
-                                          bpf_program__name(prog), "GPL",
-                                          res.new_insn_ptr,
-                                          res.new_insn_cnt, NULL);
-                       if (fd < 0) {
-                               char bf[128];
-
-                               libbpf_strerror(-errno, bf, sizeof(bf));
-                               pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n",
-                                        -errno, bf);
-                               return -errno;
-                       }
-                       priv->prologue_fds[i] = fd;
-               }
-               /*
-                * We no longer need the original program,
-                * we can unload it.
-                */
-               bpf_program__unload(prog);
-       }
-       return 0;
-}
-
-int bpf__load(struct bpf_object *obj)
-{
-       int err;
-
-       err = bpf_object__load(obj);
-       if (err) {
-               char bf[128];
-               libbpf_strerror(err, bf, sizeof(bf));
-               pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf);
-               return err;
-       }
-       return bpf_object__load_prologue(obj);
-}
-
-int bpf__foreach_event(struct bpf_object *obj,
-                      bpf_prog_iter_callback_t func,
-                      void *arg)
-{
-       struct bpf_program *prog;
-       int err;
-
-       bpf_object__for_each_program(prog, obj) {
-               struct bpf_prog_priv *priv = program_priv(prog);
-               struct probe_trace_event *tev;
-               struct perf_probe_event *pev;
-               int i, fd;
-
-               if (IS_ERR_OR_NULL(priv)) {
-                       pr_debug("bpf: failed to get private field\n");
-                       return -BPF_LOADER_ERRNO__INTERNAL;
-               }
-
-               if (priv->is_tp) {
-                       fd = bpf_program__fd(prog);
-                       err = (*func)(priv->sys_name, priv->evt_name, fd, obj, arg);
-                       if (err) {
-                               pr_debug("bpf: tracepoint call back failed, stop iterate\n");
-                               return err;
-                       }
-                       continue;
-               }
-
-               pev = &priv->pev;
-               for (i = 0; i < pev->ntevs; i++) {
-                       tev = &pev->tevs[i];
-
-                       if (priv->need_prologue)
-                               fd = priv->prologue_fds[i];
-                       else
-                               fd = bpf_program__fd(prog);
-
-                       if (fd < 0) {
-                               pr_debug("bpf: failed to get file descriptor\n");
-                               return fd;
-                       }
-
-                       err = (*func)(tev->group, tev->event, fd, obj, arg);
-                       if (err) {
-                               pr_debug("bpf: call back failed, stop iterate\n");
-                               return err;
-                       }
-               }
-       }
-       return 0;
-}
-
-enum bpf_map_op_type {
-       BPF_MAP_OP_SET_VALUE,
-       BPF_MAP_OP_SET_EVSEL,
-};
-
-enum bpf_map_key_type {
-       BPF_MAP_KEY_ALL,
-       BPF_MAP_KEY_RANGES,
-};
-
-struct bpf_map_op {
-       struct list_head list;
-       enum bpf_map_op_type op_type;
-       enum bpf_map_key_type key_type;
-       union {
-               struct parse_events_array array;
-       } k;
-       union {
-               u64 value;
-               struct evsel *evsel;
-       } v;
-};
-
-struct bpf_map_priv {
-       struct list_head ops_list;
-};
-
-static void
-bpf_map_op__delete(struct bpf_map_op *op)
-{
-       if (!list_empty(&op->list))
-               list_del_init(&op->list);
-       if (op->key_type == BPF_MAP_KEY_RANGES)
-               parse_events__clear_array(&op->k.array);
-       free(op);
-}
-
-static void
-bpf_map_priv__purge(struct bpf_map_priv *priv)
-{
-       struct bpf_map_op *pos, *n;
-
-       list_for_each_entry_safe(pos, n, &priv->ops_list, list) {
-               list_del_init(&pos->list);
-               bpf_map_op__delete(pos);
-       }
-}
-
-static void
-bpf_map_priv__clear(const struct bpf_map *map __maybe_unused,
-                   void *_priv)
-{
-       struct bpf_map_priv *priv = _priv;
-
-       bpf_map_priv__purge(priv);
-       free(priv);
-}
-
-static void *map_priv(const struct bpf_map *map)
-{
-       void *priv;
-
-       if (IS_ERR_OR_NULL(bpf_map_hash))
-               return NULL;
-       if (!hashmap__find(bpf_map_hash, map, &priv))
-               return NULL;
-       return priv;
-}
-
-static void bpf_map_hash_free(void)
-{
-       struct hashmap_entry *cur;
-       size_t bkt;
-
-       if (IS_ERR_OR_NULL(bpf_map_hash))
-               return;
-
-       hashmap__for_each_entry(bpf_map_hash, cur, bkt)
-               bpf_map_priv__clear(cur->pkey, cur->pvalue);
-
-       hashmap__free(bpf_map_hash);
-       bpf_map_hash = NULL;
-}
-
-static int map_set_priv(struct bpf_map *map, void *priv)
-{
-       void *old_priv;
-
-       if (WARN_ON_ONCE(IS_ERR(bpf_map_hash)))
-               return PTR_ERR(bpf_program_hash);
-
-       if (!bpf_map_hash) {
-               bpf_map_hash = hashmap__new(ptr_hash, ptr_equal, NULL);
-               if (IS_ERR(bpf_map_hash))
-                       return PTR_ERR(bpf_map_hash);
-       }
-
-       old_priv = map_priv(map);
-       if (old_priv) {
-               bpf_map_priv__clear(map, old_priv);
-               return hashmap__set(bpf_map_hash, map, priv, NULL, NULL);
-       }
-       return hashmap__add(bpf_map_hash, map, priv);
-}
-
-static int
-bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
-{
-       op->key_type = BPF_MAP_KEY_ALL;
-       if (!term)
-               return 0;
-
-       if (term->array.nr_ranges) {
-               size_t memsz = term->array.nr_ranges *
-                               sizeof(op->k.array.ranges[0]);
-
-               op->k.array.ranges = memdup(term->array.ranges, memsz);
-               if (!op->k.array.ranges) {
-                       pr_debug("Not enough memory to alloc indices for map\n");
-                       return -ENOMEM;
-               }
-               op->key_type = BPF_MAP_KEY_RANGES;
-               op->k.array.nr_ranges = term->array.nr_ranges;
-       }
-       return 0;
-}
-
-static struct bpf_map_op *
-bpf_map_op__new(struct parse_events_term *term)
-{
-       struct bpf_map_op *op;
-       int err;
-
-       op = zalloc(sizeof(*op));
-       if (!op) {
-               pr_debug("Failed to alloc bpf_map_op\n");
-               return ERR_PTR(-ENOMEM);
-       }
-       INIT_LIST_HEAD(&op->list);
-
-       err = bpf_map_op_setkey(op, term);
-       if (err) {
-               free(op);
-               return ERR_PTR(err);
-       }
-       return op;
-}
-
-static struct bpf_map_op *
-bpf_map_op__clone(struct bpf_map_op *op)
-{
-       struct bpf_map_op *newop;
-
-       newop = memdup(op, sizeof(*op));
-       if (!newop) {
-               pr_debug("Failed to alloc bpf_map_op\n");
-               return NULL;
-       }
-
-       INIT_LIST_HEAD(&newop->list);
-       if (op->key_type == BPF_MAP_KEY_RANGES) {
-               size_t memsz = op->k.array.nr_ranges *
-                              sizeof(op->k.array.ranges[0]);
-
-               newop->k.array.ranges = memdup(op->k.array.ranges, memsz);
-               if (!newop->k.array.ranges) {
-                       pr_debug("Failed to alloc indices for map\n");
-                       free(newop);
-                       return NULL;
-               }
-       }
-
-       return newop;
-}
-
-static struct bpf_map_priv *
-bpf_map_priv__clone(struct bpf_map_priv *priv)
-{
-       struct bpf_map_priv *newpriv;
-       struct bpf_map_op *pos, *newop;
-
-       newpriv = zalloc(sizeof(*newpriv));
-       if (!newpriv) {
-               pr_debug("Not enough memory to alloc map private\n");
-               return NULL;
-       }
-       INIT_LIST_HEAD(&newpriv->ops_list);
-
-       list_for_each_entry(pos, &priv->ops_list, list) {
-               newop = bpf_map_op__clone(pos);
-               if (!newop) {
-                       bpf_map_priv__purge(newpriv);
-                       return NULL;
-               }
-               list_add_tail(&newop->list, &newpriv->ops_list);
-       }
-
-       return newpriv;
-}
-
-static int
-bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
-{
-       const char *map_name = bpf_map__name(map);
-       struct bpf_map_priv *priv = map_priv(map);
-
-       if (IS_ERR(priv)) {
-               pr_debug("Failed to get private from map %s\n", map_name);
-               return PTR_ERR(priv);
-       }
-
-       if (!priv) {
-               priv = zalloc(sizeof(*priv));
-               if (!priv) {
-                       pr_debug("Not enough memory to alloc map private\n");
-                       return -ENOMEM;
-               }
-               INIT_LIST_HEAD(&priv->ops_list);
-
-               if (map_set_priv(map, priv)) {
-                       free(priv);
-                       return -BPF_LOADER_ERRNO__INTERNAL;
-               }
-       }
-
-       list_add_tail(&op->list, &priv->ops_list);
-       return 0;
-}
-
-static struct bpf_map_op *
-bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term)
-{
-       struct bpf_map_op *op;
-       int err;
-
-       op = bpf_map_op__new(term);
-       if (IS_ERR(op))
-               return op;
-
-       err = bpf_map__add_op(map, op);
-       if (err) {
-               bpf_map_op__delete(op);
-               return ERR_PTR(err);
-       }
-       return op;
-}
-
-static int
-__bpf_map__config_value(struct bpf_map *map,
-                       struct parse_events_term *term)
-{
-       struct bpf_map_op *op;
-       const char *map_name = bpf_map__name(map);
-
-       if (!map) {
-               pr_debug("Map '%s' is invalid\n", map_name);
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-
-       if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) {
-               pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
-                        map_name);
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
-       }
-       if (bpf_map__key_size(map) < sizeof(unsigned int)) {
-               pr_debug("Map %s has incorrect key size\n", map_name);
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
-       }
-       switch (bpf_map__value_size(map)) {
-       case 1:
-       case 2:
-       case 4:
-       case 8:
-               break;
-       default:
-               pr_debug("Map %s has incorrect value size\n", map_name);
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
-       }
-
-       op = bpf_map__add_newop(map, term);
-       if (IS_ERR(op))
-               return PTR_ERR(op);
-       op->op_type = BPF_MAP_OP_SET_VALUE;
-       op->v.value = term->val.num;
-       return 0;
-}
-
-static int
-bpf_map__config_value(struct bpf_map *map,
-                     struct parse_events_term *term,
-                     struct evlist *evlist __maybe_unused)
-{
-       if (!term->err_val) {
-               pr_debug("Config value not set\n");
-               return -BPF_LOADER_ERRNO__OBJCONF_CONF;
-       }
-
-       if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
-               pr_debug("ERROR: wrong value type for 'value'\n");
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
-       }
-
-       return __bpf_map__config_value(map, term);
-}
-
-static int
-__bpf_map__config_event(struct bpf_map *map,
-                       struct parse_events_term *term,
-                       struct evlist *evlist)
-{
-       struct bpf_map_op *op;
-       const char *map_name = bpf_map__name(map);
-       struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
-
-       if (!evsel) {
-               pr_debug("Event (for '%s') '%s' doesn't exist\n",
-                        map_name, term->val.str);
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
-       }
-
-       if (!map) {
-               pr_debug("Map '%s' is invalid\n", map_name);
-               return PTR_ERR(map);
-       }
-
-       /*
-        * No need to check key_size and value_size:
-        * kernel has already checked them.
-        */
-       if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
-               pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
-                        map_name);
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
-       }
-
-       op = bpf_map__add_newop(map, term);
-       if (IS_ERR(op))
-               return PTR_ERR(op);
-       op->op_type = BPF_MAP_OP_SET_EVSEL;
-       op->v.evsel = evsel;
-       return 0;
-}
-
-static int
-bpf_map__config_event(struct bpf_map *map,
-                     struct parse_events_term *term,
-                     struct evlist *evlist)
-{
-       if (!term->err_val) {
-               pr_debug("Config value not set\n");
-               return -BPF_LOADER_ERRNO__OBJCONF_CONF;
-       }
-
-       if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
-               pr_debug("ERROR: wrong value type for 'event'\n");
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
-       }
-
-       return __bpf_map__config_event(map, term, evlist);
-}
-
-struct bpf_obj_config__map_func {
-       const char *config_opt;
-       int (*config_func)(struct bpf_map *, struct parse_events_term *,
-                          struct evlist *);
-};
-
-struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
-       {"value", bpf_map__config_value},
-       {"event", bpf_map__config_event},
-};
-
-static int
-config_map_indices_range_check(struct parse_events_term *term,
-                              struct bpf_map *map,
-                              const char *map_name)
-{
-       struct parse_events_array *array = &term->array;
-       unsigned int i;
-
-       if (!array->nr_ranges)
-               return 0;
-       if (!array->ranges) {
-               pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
-                        map_name, (int)array->nr_ranges);
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-
-       if (!map) {
-               pr_debug("Map '%s' is invalid\n", map_name);
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-
-       for (i = 0; i < array->nr_ranges; i++) {
-               unsigned int start = array->ranges[i].start;
-               size_t length = array->ranges[i].length;
-               unsigned int idx = start + length - 1;
-
-               if (idx >= bpf_map__max_entries(map)) {
-                       pr_debug("ERROR: index %d too large\n", idx);
-                       return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
-               }
-       }
-       return 0;
-}
-
-static int
-bpf__obj_config_map(struct bpf_object *obj,
-                   struct parse_events_term *term,
-                   struct evlist *evlist,
-                   int *key_scan_pos)
-{
-       /* key is "map:<mapname>.<config opt>" */
-       char *map_name = strdup(term->config + sizeof("map:") - 1);
-       struct bpf_map *map;
-       int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
-       char *map_opt;
-       size_t i;
-
-       if (!map_name)
-               return -ENOMEM;
-
-       map_opt = strchr(map_name, '.');
-       if (!map_opt) {
-               pr_debug("ERROR: Invalid map config: %s\n", map_name);
-               goto out;
-       }
-
-       *map_opt++ = '\0';
-       if (*map_opt == '\0') {
-               pr_debug("ERROR: Invalid map option: %s\n", term->config);
-               goto out;
-       }
-
-       map = bpf_object__find_map_by_name(obj, map_name);
-       if (!map) {
-               pr_debug("ERROR: Map %s doesn't exist\n", map_name);
-               err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
-               goto out;
-       }
-
-       *key_scan_pos += strlen(map_opt);
-       err = config_map_indices_range_check(term, map, map_name);
-       if (err)
-               goto out;
-       *key_scan_pos -= strlen(map_opt);
-
-       for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
-               struct bpf_obj_config__map_func *func =
-                               &bpf_obj_config__map_funcs[i];
-
-               if (strcmp(map_opt, func->config_opt) == 0) {
-                       err = func->config_func(map, term, evlist);
-                       goto out;
-               }
-       }
-
-       pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
-       err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
-out:
-       if (!err)
-               *key_scan_pos += strlen(map_opt);
-
-       free(map_name);
-       return err;
-}
-
-int bpf__config_obj(struct bpf_object *obj,
-                   struct parse_events_term *term,
-                   struct evlist *evlist,
-                   int *error_pos)
-{
-       int key_scan_pos = 0;
-       int err;
-
-       if (!obj || !term || !term->config)
-               return -EINVAL;
-
-       if (strstarts(term->config, "map:")) {
-               key_scan_pos = sizeof("map:") - 1;
-               err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
-               goto out;
-       }
-       err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
-out:
-       if (error_pos)
-               *error_pos = key_scan_pos;
-       return err;
-
-}
-
-typedef int (*map_config_func_t)(const char *name, int map_fd,
-                                const struct bpf_map *map,
-                                struct bpf_map_op *op,
-                                void *pkey, void *arg);
-
-static int
-foreach_key_array_all(map_config_func_t func,
-                     void *arg, const char *name,
-                     int map_fd, const struct bpf_map *map,
-                     struct bpf_map_op *op)
-{
-       unsigned int i;
-       int err;
-
-       for (i = 0; i < bpf_map__max_entries(map); i++) {
-               err = func(name, map_fd, map, op, &i, arg);
-               if (err) {
-                       pr_debug("ERROR: failed to insert value to %s[%u]\n",
-                                name, i);
-                       return err;
-               }
-       }
-       return 0;
-}
-
-static int
-foreach_key_array_ranges(map_config_func_t func, void *arg,
-                        const char *name, int map_fd,
-                        const struct bpf_map *map,
-                        struct bpf_map_op *op)
-{
-       unsigned int i, j;
-       int err;
-
-       for (i = 0; i < op->k.array.nr_ranges; i++) {
-               unsigned int start = op->k.array.ranges[i].start;
-               size_t length = op->k.array.ranges[i].length;
-
-               for (j = 0; j < length; j++) {
-                       unsigned int idx = start + j;
-
-                       err = func(name, map_fd, map, op, &idx, arg);
-                       if (err) {
-                               pr_debug("ERROR: failed to insert value to %s[%u]\n",
-                                        name, idx);
-                               return err;
-                       }
-               }
-       }
-       return 0;
-}
-
-static int
-bpf_map_config_foreach_key(struct bpf_map *map,
-                          map_config_func_t func,
-                          void *arg)
-{
-       int err, map_fd, type;
-       struct bpf_map_op *op;
-       const char *name = bpf_map__name(map);
-       struct bpf_map_priv *priv = map_priv(map);
-
-       if (IS_ERR(priv)) {
-               pr_debug("ERROR: failed to get private from map %s\n", name);
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-       if (!priv || list_empty(&priv->ops_list)) {
-               pr_debug("INFO: nothing to config for map %s\n", name);
-               return 0;
-       }
-
-       if (!map) {
-               pr_debug("Map '%s' is invalid\n", name);
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-       map_fd = bpf_map__fd(map);
-       if (map_fd < 0) {
-               pr_debug("ERROR: failed to get fd from map %s\n", name);
-               return map_fd;
-       }
-
-       type = bpf_map__type(map);
-       list_for_each_entry(op, &priv->ops_list, list) {
-               switch (type) {
-               case BPF_MAP_TYPE_ARRAY:
-               case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
-                       switch (op->key_type) {
-                       case BPF_MAP_KEY_ALL:
-                               err = foreach_key_array_all(func, arg, name,
-                                                           map_fd, map, op);
-                               break;
-                       case BPF_MAP_KEY_RANGES:
-                               err = foreach_key_array_ranges(func, arg, name,
-                                                              map_fd, map, op);
-                               break;
-                       default:
-                               pr_debug("ERROR: keytype for map '%s' invalid\n",
-                                        name);
-                               return -BPF_LOADER_ERRNO__INTERNAL;
-                       }
-                       if (err)
-                               return err;
-                       break;
-               default:
-                       pr_debug("ERROR: type of '%s' incorrect\n", name);
-                       return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
-               }
-       }
-
-       return 0;
-}
-
-static int
-apply_config_value_for_key(int map_fd, void *pkey,
-                          size_t val_size, u64 val)
-{
-       int err = 0;
-
-       switch (val_size) {
-       case 1: {
-               u8 _val = (u8)(val);
-               err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
-               break;
-       }
-       case 2: {
-               u16 _val = (u16)(val);
-               err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
-               break;
-       }
-       case 4: {
-               u32 _val = (u32)(val);
-               err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
-               break;
-       }
-       case 8: {
-               err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
-               break;
-       }
-       default:
-               pr_debug("ERROR: invalid value size\n");
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
-       }
-       if (err && errno)
-               err = -errno;
-       return err;
-}
-
-static int
-apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
-                          struct evsel *evsel)
-{
-       struct xyarray *xy = evsel->core.fd;
-       struct perf_event_attr *attr;
-       unsigned int key, events;
-       bool check_pass = false;
-       int *evt_fd;
-       int err;
-
-       if (!xy) {
-               pr_debug("ERROR: evsel not ready for map %s\n", name);
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-
-       if (xy->row_size / xy->entry_size != 1) {
-               pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
-                        name);
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
-       }
-
-       attr = &evsel->core.attr;
-       if (attr->inherit) {
-               pr_debug("ERROR: Can't put inherit event into map %s\n", name);
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
-       }
-
-       if (evsel__is_bpf_output(evsel))
-               check_pass = true;
-       if (attr->type == PERF_TYPE_RAW)
-               check_pass = true;
-       if (attr->type == PERF_TYPE_HARDWARE)
-               check_pass = true;
-       if (!check_pass) {
-               pr_debug("ERROR: Event type is wrong for map %s\n", name);
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
-       }
-
-       events = xy->entries / (xy->row_size / xy->entry_size);
-       key = *((unsigned int *)pkey);
-       if (key >= events) {
-               pr_debug("ERROR: there is no event %d for map %s\n",
-                        key, name);
-               return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
-       }
-       evt_fd = xyarray__entry(xy, key, 0);
-       err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
-       if (err && errno)
-               err = -errno;
-       return err;
-}
-
-static int
-apply_obj_config_map_for_key(const char *name, int map_fd,
-                            const struct bpf_map *map,
-                            struct bpf_map_op *op,
-                            void *pkey, void *arg __maybe_unused)
-{
-       int err;
-
-       switch (op->op_type) {
-       case BPF_MAP_OP_SET_VALUE:
-               err = apply_config_value_for_key(map_fd, pkey,
-                                                bpf_map__value_size(map),
-                                                op->v.value);
-               break;
-       case BPF_MAP_OP_SET_EVSEL:
-               err = apply_config_evsel_for_key(name, map_fd, pkey,
-                                                op->v.evsel);
-               break;
-       default:
-               pr_debug("ERROR: unknown value type for '%s'\n", name);
-               err = -BPF_LOADER_ERRNO__INTERNAL;
-       }
-       return err;
-}
-
-static int
-apply_obj_config_map(struct bpf_map *map)
-{
-       return bpf_map_config_foreach_key(map,
-                                         apply_obj_config_map_for_key,
-                                         NULL);
-}
-
-static int
-apply_obj_config_object(struct bpf_object *obj)
-{
-       struct bpf_map *map;
-       int err;
-
-       bpf_object__for_each_map(map, obj) {
-               err = apply_obj_config_map(map);
-               if (err)
-                       return err;
-       }
-       return 0;
-}
-
-int bpf__apply_obj_config(void)
-{
-       struct bpf_perf_object *perf_obj, *tmp;
-       int err;
-
-       bpf_perf_object__for_each(perf_obj, tmp) {
-               err = apply_obj_config_object(perf_obj->obj);
-               if (err)
-                       return err;
-       }
-
-       return 0;
-}
-
-#define bpf__perf_for_each_map(map, pobj, tmp)                 \
-       bpf_perf_object__for_each(pobj, tmp)                    \
-               bpf_object__for_each_map(map, pobj->obj)
-
-#define bpf__perf_for_each_map_named(map, pobj, pobjtmp, name) \
-       bpf__perf_for_each_map(map, pobj, pobjtmp)              \
-               if (bpf_map__name(map) && (strcmp(name, bpf_map__name(map)) == 0))
-
-struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name)
-{
-       struct bpf_map_priv *tmpl_priv = NULL;
-       struct bpf_perf_object *perf_obj, *tmp;
-       struct evsel *evsel = NULL;
-       struct bpf_map *map;
-       int err;
-       bool need_init = false;
-
-       bpf__perf_for_each_map_named(map, perf_obj, tmp, name) {
-               struct bpf_map_priv *priv = map_priv(map);
-
-               if (IS_ERR(priv))
-                       return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
-
-               /*
-                * No need to check map type: type should have been
-                * verified by kernel.
-                */
-               if (!need_init && !priv)
-                       need_init = !priv;
-               if (!tmpl_priv && priv)
-                       tmpl_priv = priv;
-       }
-
-       if (!need_init)
-               return NULL;
-
-       if (!tmpl_priv) {
-               char *event_definition = NULL;
-
-               if (asprintf(&event_definition, "bpf-output/no-inherit=1,name=%s/", name) < 0)
-                       return ERR_PTR(-ENOMEM);
-
-               err = parse_event(evlist, event_definition);
-               free(event_definition);
-
-               if (err) {
-                       pr_debug("ERROR: failed to create the \"%s\" bpf-output event\n", name);
-                       return ERR_PTR(-err);
-               }
-
-               evsel = evlist__last(evlist);
-       }
-
-       bpf__perf_for_each_map_named(map, perf_obj, tmp, name) {
-               struct bpf_map_priv *priv = map_priv(map);
-
-               if (IS_ERR(priv))
-                       return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
-               if (priv)
-                       continue;
-
-               if (tmpl_priv) {
-                       priv = bpf_map_priv__clone(tmpl_priv);
-                       if (!priv)
-                               return ERR_PTR(-ENOMEM);
-
-                       err = map_set_priv(map, priv);
-                       if (err) {
-                               bpf_map_priv__clear(map, priv);
-                               return ERR_PTR(err);
-                       }
-               } else if (evsel) {
-                       struct bpf_map_op *op;
-
-                       op = bpf_map__add_newop(map, NULL);
-                       if (IS_ERR(op))
-                               return ERR_CAST(op);
-                       op->op_type = BPF_MAP_OP_SET_EVSEL;
-                       op->v.evsel = evsel;
-               }
-       }
-
-       return evsel;
-}
-
-int bpf__setup_stdout(struct evlist *evlist)
-{
-       struct evsel *evsel = bpf__setup_output_event(evlist, "__bpf_stdout__");
-       return PTR_ERR_OR_ZERO(evsel);
-}
-
-#define ERRNO_OFFSET(e)                ((e) - __BPF_LOADER_ERRNO__START)
-#define ERRCODE_OFFSET(c)      ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
-#define NR_ERRNO       (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
-
-static const char *bpf_loader_strerror_table[NR_ERRNO] = {
-       [ERRCODE_OFFSET(CONFIG)]        = "Invalid config string",
-       [ERRCODE_OFFSET(GROUP)]         = "Invalid group name",
-       [ERRCODE_OFFSET(EVENTNAME)]     = "No event name found in config string",
-       [ERRCODE_OFFSET(INTERNAL)]      = "BPF loader internal error",
-       [ERRCODE_OFFSET(COMPILE)]       = "Error when compiling BPF scriptlet",
-       [ERRCODE_OFFSET(PROGCONF_TERM)] = "Invalid program config term in config string",
-       [ERRCODE_OFFSET(PROLOGUE)]      = "Failed to generate prologue",
-       [ERRCODE_OFFSET(PROLOGUE2BIG)]  = "Prologue too big for program",
-       [ERRCODE_OFFSET(PROLOGUEOOB)]   = "Offset out of bound for prologue",
-       [ERRCODE_OFFSET(OBJCONF_OPT)]   = "Invalid object config option",
-       [ERRCODE_OFFSET(OBJCONF_CONF)]  = "Config value not set (missing '=')",
-       [ERRCODE_OFFSET(OBJCONF_MAP_OPT)]       = "Invalid object map config option",
-       [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)]  = "Target map doesn't exist",
-       [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)]     = "Incorrect value type for map",
-       [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)]      = "Incorrect map type",
-       [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)]   = "Incorrect map key size",
-       [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size",
-       [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)]     = "Event not found for map setting",
-       [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)]   = "Invalid map size for event setting",
-       [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)]    = "Event dimension too large",
-       [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)]    = "Doesn't support inherit event",
-       [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)]   = "Wrong event type for map",
-       [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)]   = "Index too large",
-};
-
-static int
-bpf_loader_strerror(int err, char *buf, size_t size)
-{
-       char sbuf[STRERR_BUFSIZE];
-       const char *msg;
-
-       if (!buf || !size)
-               return -1;
-
-       err = err > 0 ? err : -err;
-
-       if (err >= __LIBBPF_ERRNO__START)
-               return libbpf_strerror(err, buf, size);
-
-       if (err >= __BPF_LOADER_ERRNO__START && err < __BPF_LOADER_ERRNO__END) {
-               msg = bpf_loader_strerror_table[ERRNO_OFFSET(err)];
-               snprintf(buf, size, "%s", msg);
-               buf[size - 1] = '\0';
-               return 0;
-       }
-
-       if (err >= __BPF_LOADER_ERRNO__END)
-               snprintf(buf, size, "Unknown bpf loader error %d", err);
-       else
-               snprintf(buf, size, "%s",
-                        str_error_r(err, sbuf, sizeof(sbuf)));
-
-       buf[size - 1] = '\0';
-       return -1;
-}
-
-#define bpf__strerror_head(err, buf, size) \
-       char sbuf[STRERR_BUFSIZE], *emsg;\
-       if (!size)\
-               return 0;\
-       if (err < 0)\
-               err = -err;\
-       bpf_loader_strerror(err, sbuf, sizeof(sbuf));\
-       emsg = sbuf;\
-       switch (err) {\
-       default:\
-               scnprintf(buf, size, "%s", emsg);\
-               break;
-
-#define bpf__strerror_entry(val, fmt...)\
-       case val: {\
-               scnprintf(buf, size, fmt);\
-               break;\
-       }
-
-#define bpf__strerror_end(buf, size)\
-       }\
-       buf[size - 1] = '\0';
-
-int bpf__strerror_prepare_load(const char *filename, bool source,
-                              int err, char *buf, size_t size)
-{
-       size_t n;
-       int ret;
-
-       n = snprintf(buf, size, "Failed to load %s%s: ",
-                        filename, source ? " from source" : "");
-       if (n >= size) {
-               buf[size - 1] = '\0';
-               return 0;
-       }
-       buf += n;
-       size -= n;
-
-       ret = bpf_loader_strerror(err, buf, size);
-       buf[size - 1] = '\0';
-       return ret;
-}
-
-int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
-                       int err, char *buf, size_t size)
-{
-       bpf__strerror_head(err, buf, size);
-       case BPF_LOADER_ERRNO__PROGCONF_TERM: {
-               scnprintf(buf, size, "%s (add -v to see detail)", emsg);
-               break;
-       }
-       bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
-       bpf__strerror_entry(EACCES, "You need to be root");
-       bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
-       bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
-       bpf__strerror_end(buf, size);
-       return 0;
-}
-
-int bpf__strerror_load(struct bpf_object *obj,
-                      int err, char *buf, size_t size)
-{
-       bpf__strerror_head(err, buf, size);
-       case LIBBPF_ERRNO__KVER: {
-               unsigned int obj_kver = bpf_object__kversion(obj);
-               unsigned int real_kver;
-
-               if (fetch_kernel_version(&real_kver, NULL, 0)) {
-                       scnprintf(buf, size, "Unable to fetch kernel version");
-                       break;
-               }
-
-               if (obj_kver != real_kver) {
-                       scnprintf(buf, size,
-                                 "'version' ("KVER_FMT") doesn't match running kernel ("KVER_FMT")",
-                                 KVER_PARAM(obj_kver),
-                                 KVER_PARAM(real_kver));
-                       break;
-               }
-
-               scnprintf(buf, size, "Failed to load program for unknown reason");
-               break;
-       }
-       bpf__strerror_end(buf, size);
-       return 0;
-}
-
-int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
-                            struct parse_events_term *term __maybe_unused,
-                            struct evlist *evlist __maybe_unused,
-                            int *error_pos __maybe_unused, int err,
-                            char *buf, size_t size)
-{
-       bpf__strerror_head(err, buf, size);
-       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
-                           "Can't use this config term with this map type");
-       bpf__strerror_end(buf, size);
-       return 0;
-}
-
-int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
-{
-       bpf__strerror_head(err, buf, size);
-       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
-                           "Cannot set event to BPF map in multi-thread tracing");
-       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
-                           "%s (Hint: use -i to turn off inherit)", emsg);
-       bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
-                           "Can only put raw, hardware and BPF output event into a BPF map");
-       bpf__strerror_end(buf, size);
-       return 0;
-}
-
-int bpf__strerror_setup_output_event(struct evlist *evlist __maybe_unused,
-                                    int err, char *buf, size_t size)
-{
-       bpf__strerror_head(err, buf, size);
-       bpf__strerror_end(buf, size);
-       return 0;
-}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
deleted file mode 100644 (file)
index 5d1c725..0000000
+++ /dev/null
@@ -1,216 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
- * Copyright (C) 2015, Huawei Inc.
- */
-#ifndef __BPF_LOADER_H
-#define __BPF_LOADER_H
-
-#include <linux/compiler.h>
-#include <linux/err.h>
-
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/libbpf.h>
-
-enum bpf_loader_errno {
-       __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100,
-       /* Invalid config string */
-       BPF_LOADER_ERRNO__CONFIG = __BPF_LOADER_ERRNO__START,
-       BPF_LOADER_ERRNO__GROUP,        /* Invalid group name */
-       BPF_LOADER_ERRNO__EVENTNAME,    /* Event name is missing */
-       BPF_LOADER_ERRNO__INTERNAL,     /* BPF loader internal error */
-       BPF_LOADER_ERRNO__COMPILE,      /* Error when compiling BPF scriptlet */
-       BPF_LOADER_ERRNO__PROGCONF_TERM,/* Invalid program config term in config string */
-       BPF_LOADER_ERRNO__PROLOGUE,     /* Failed to generate prologue */
-       BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */
-       BPF_LOADER_ERRNO__PROLOGUEOOB,  /* Offset out of bound for prologue */
-       BPF_LOADER_ERRNO__OBJCONF_OPT,  /* Invalid object config option */
-       BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_OPT,      /* Invalid object map config option */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE,    /* Incorrect value type for map */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,     /* Incorrect map type */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE,  /* Incorrect map key size */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT,    /* Event not found for map setting */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE,  /* Invalid map size for event setting */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,   /* Event dimension too large */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,   /* Doesn't support inherit event */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,  /* Wrong event type for map */
-       BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG,  /* Index too large */
-       __BPF_LOADER_ERRNO__END,
-};
-#endif // HAVE_LIBBPF_SUPPORT
-
-struct evsel;
-struct evlist;
-struct bpf_object;
-struct parse_events_term;
-#define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
-
-typedef int (*bpf_prog_iter_callback_t)(const char *group, const char *event,
-                                       int fd, struct bpf_object *obj, void *arg);
-
-#ifdef HAVE_LIBBPF_SUPPORT
-struct bpf_object *bpf__prepare_load(const char *filename, bool source);
-int bpf__strerror_prepare_load(const char *filename, bool source,
-                              int err, char *buf, size_t size);
-
-struct bpf_object *bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz,
-                                           const char *name);
-
-void bpf__clear(void);
-
-int bpf__probe(struct bpf_object *obj);
-int bpf__unprobe(struct bpf_object *obj);
-int bpf__strerror_probe(struct bpf_object *obj, int err,
-                       char *buf, size_t size);
-
-int bpf__load(struct bpf_object *obj);
-int bpf__strerror_load(struct bpf_object *obj, int err,
-                      char *buf, size_t size);
-int bpf__foreach_event(struct bpf_object *obj,
-                      bpf_prog_iter_callback_t func, void *arg);
-
-int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
-                   struct evlist *evlist, int *error_pos);
-int bpf__strerror_config_obj(struct bpf_object *obj,
-                            struct parse_events_term *term,
-                            struct evlist *evlist,
-                            int *error_pos, int err, char *buf,
-                            size_t size);
-int bpf__apply_obj_config(void);
-int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
-
-int bpf__setup_stdout(struct evlist *evlist);
-struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name);
-int bpf__strerror_setup_output_event(struct evlist *evlist, int err, char *buf, size_t size);
-#else
-#include <errno.h>
-#include <string.h>
-#include "debug.h"
-
-static inline struct bpf_object *
-bpf__prepare_load(const char *filename __maybe_unused,
-                 bool source __maybe_unused)
-{
-       pr_debug("ERROR: eBPF object loading is disabled during compiling.\n");
-       return ERR_PTR(-ENOTSUP);
-}
-
-static inline struct bpf_object *
-bpf__prepare_load_buffer(void *obj_buf __maybe_unused,
-                                          size_t obj_buf_sz __maybe_unused)
-{
-       return ERR_PTR(-ENOTSUP);
-}
-
-static inline void bpf__clear(void) { }
-
-static inline int bpf__probe(struct bpf_object *obj __maybe_unused) { return 0;}
-static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0;}
-static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; }
-
-static inline int
-bpf__foreach_event(struct bpf_object *obj __maybe_unused,
-                  bpf_prog_iter_callback_t func __maybe_unused,
-                  void *arg __maybe_unused)
-{
-       return 0;
-}
-
-static inline int
-bpf__config_obj(struct bpf_object *obj __maybe_unused,
-               struct parse_events_term *term __maybe_unused,
-               struct evlist *evlist __maybe_unused,
-               int *error_pos __maybe_unused)
-{
-       return 0;
-}
-
-static inline int
-bpf__apply_obj_config(void)
-{
-       return 0;
-}
-
-static inline int
-bpf__setup_stdout(struct evlist *evlist __maybe_unused)
-{
-       return 0;
-}
-
-static inline struct evsel *
-bpf__setup_output_event(struct evlist *evlist __maybe_unused, const char *name __maybe_unused)
-{
-       return NULL;
-}
-
-static inline int
-__bpf_strerror(char *buf, size_t size)
-{
-       if (!size)
-               return 0;
-       strncpy(buf,
-               "ERROR: eBPF object loading is disabled during compiling.\n",
-               size);
-       buf[size - 1] = '\0';
-       return 0;
-}
-
-static inline
-int bpf__strerror_prepare_load(const char *filename __maybe_unused,
-                              bool source __maybe_unused,
-                              int err __maybe_unused,
-                              char *buf, size_t size)
-{
-       return __bpf_strerror(buf, size);
-}
-
-static inline int
-bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
-                   int err __maybe_unused,
-                   char *buf, size_t size)
-{
-       return __bpf_strerror(buf, size);
-}
-
-static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused,
-                                    int err __maybe_unused,
-                                    char *buf, size_t size)
-{
-       return __bpf_strerror(buf, size);
-}
-
-static inline int
-bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
-                        struct parse_events_term *term __maybe_unused,
-                        struct evlist *evlist __maybe_unused,
-                        int *error_pos __maybe_unused,
-                        int err __maybe_unused,
-                        char *buf, size_t size)
-{
-       return __bpf_strerror(buf, size);
-}
-
-static inline int
-bpf__strerror_apply_obj_config(int err __maybe_unused,
-                              char *buf, size_t size)
-{
-       return __bpf_strerror(buf, size);
-}
-
-static inline int
-bpf__strerror_setup_output_event(struct evlist *evlist __maybe_unused,
-                                int err __maybe_unused, char *buf, size_t size)
-{
-       return __bpf_strerror(buf, size);
-}
-
-#endif
-
-static inline int bpf__strerror_setup_stdout(struct evlist *evlist, int err, char *buf, size_t size)
-{
-       return bpf__strerror_setup_output_event(evlist, err, buf, size);
-}
-#endif
@@ -2,22 +2,26 @@
 /*
  * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
  *
- * Test it with:
- *
- * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
- *
  * This exactly matches what is marshalled into the raw_syscall:sys_enter
  * payload expected by the 'perf trace' beautifiers.
- *
- * For now it just uses the existing tracepoint augmentation code in 'perf
- * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
- * code that will combine entry/exit in a strace like way.
  */
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <linux/limits.h>
 
+/**
+ * is_power_of_2() - check if a value is a power of two
+ * @n: the value to check
+ *
+ * Determine whether some value is a power of two, where zero is *not*
+ * considered a power of two.  Return: true if @n is a power of 2, otherwise
+ * false.
+ */
+#define is_power_of_2(n) (n != 0 && ((n & (n - 1)) == 0))
+
+#define MAX_CPUS  4096
+
 // FIXME: These should come from system headers
 typedef char bool;
 typedef int pid_t;
@@ -34,7 +38,7 @@ struct __augmented_syscalls__ {
        __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
        __type(key, int);
        __type(value, __u32);
-       __uint(max_entries, __NR_CPUS__);
+       __uint(max_entries, MAX_CPUS);
 } __augmented_syscalls__ SEC(".maps");
 
 /*
@@ -156,6 +160,7 @@ unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const
         */
        if (string_len > 0) {
                augmented_len -= sizeof(augmented_arg->value) - string_len;
+               _Static_assert(is_power_of_2(sizeof(augmented_arg->value)), "sizeof(augmented_arg->value) needs to be a power of two");
                augmented_len &= sizeof(augmented_arg->value) - 1;
                augmented_arg->size = string_len;
        } else {
@@ -170,7 +175,7 @@ unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const
        return augmented_len;
 }
 
-SEC("!raw_syscalls:unaugmented")
+SEC("tp/raw_syscalls/sys_enter")
 int syscall_unaugmented(struct syscall_enter_args *args)
 {
        return 1;
@@ -182,7 +187,7 @@ int syscall_unaugmented(struct syscall_enter_args *args)
  * on from there, reading the first syscall arg as a string, i.e. open's
  * filename.
  */
-SEC("!syscalls:sys_enter_connect")
+SEC("tp/syscalls/sys_enter_connect")
 int sys_enter_connect(struct syscall_enter_args *args)
 {
        struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -193,15 +198,15 @@ int sys_enter_connect(struct syscall_enter_args *args)
         if (augmented_args == NULL)
                 return 1; /* Failure: don't filter */
 
-       if (socklen > sizeof(augmented_args->saddr))
-               socklen = sizeof(augmented_args->saddr);
+       _Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two");
+       socklen &= sizeof(augmented_args->saddr) - 1;
 
        bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
 
        return augmented__output(args, augmented_args, len + socklen);
 }
 
-SEC("!syscalls:sys_enter_sendto")
+SEC("tp/syscalls/sys_enter_sendto")
 int sys_enter_sendto(struct syscall_enter_args *args)
 {
        struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -212,15 +217,14 @@ int sys_enter_sendto(struct syscall_enter_args *args)
         if (augmented_args == NULL)
                 return 1; /* Failure: don't filter */
 
-       if (socklen > sizeof(augmented_args->saddr))
-               socklen = sizeof(augmented_args->saddr);
+       socklen &= sizeof(augmented_args->saddr) - 1;
 
        bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
 
        return augmented__output(args, augmented_args, len + socklen);
 }
 
-SEC("!syscalls:sys_enter_open")
+SEC("tp/syscalls/sys_enter_open")
 int sys_enter_open(struct syscall_enter_args *args)
 {
        struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -235,7 +239,7 @@ int sys_enter_open(struct syscall_enter_args *args)
        return augmented__output(args, augmented_args, len);
 }
 
-SEC("!syscalls:sys_enter_openat")
+SEC("tp/syscalls/sys_enter_openat")
 int sys_enter_openat(struct syscall_enter_args *args)
 {
        struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -250,7 +254,7 @@ int sys_enter_openat(struct syscall_enter_args *args)
        return augmented__output(args, augmented_args, len);
 }
 
-SEC("!syscalls:sys_enter_rename")
+SEC("tp/syscalls/sys_enter_rename")
 int sys_enter_rename(struct syscall_enter_args *args)
 {
        struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -267,7 +271,7 @@ int sys_enter_rename(struct syscall_enter_args *args)
        return augmented__output(args, augmented_args, len);
 }
 
-SEC("!syscalls:sys_enter_renameat")
+SEC("tp/syscalls/sys_enter_renameat")
 int sys_enter_renameat(struct syscall_enter_args *args)
 {
        struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -295,7 +299,7 @@ struct perf_event_attr_size {
         __u32                   size;
 };
 
-SEC("!syscalls:sys_enter_perf_event_open")
+SEC("tp/syscalls/sys_enter_perf_event_open")
 int sys_enter_perf_event_open(struct syscall_enter_args *args)
 {
        struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -327,7 +331,7 @@ failure:
        return 1; /* Failure: don't filter */
 }
 
-SEC("!syscalls:sys_enter_clock_nanosleep")
+SEC("tp/syscalls/sys_enter_clock_nanosleep")
 int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
 {
        struct augmented_args_payload *augmented_args = augmented_args_payload();
@@ -358,7 +362,7 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
        return bpf_map_lookup_elem(pids, &pid) != NULL;
 }
 
-SEC("raw_syscalls:sys_enter")
+SEC("tp/raw_syscalls/sys_enter")
 int sys_enter(struct syscall_enter_args *args)
 {
        struct augmented_args_payload *augmented_args;
@@ -371,7 +375,6 @@ int sys_enter(struct syscall_enter_args *args)
         * We'll add to this as we add augmented syscalls right after that
         * initial, non-augmented raw_syscalls:sys_enter payload.
         */
-       unsigned int len = sizeof(augmented_args->args);
 
        if (pid_filter__has(&pids_filtered, getpid()))
                return 0;
@@ -393,7 +396,7 @@ int sys_enter(struct syscall_enter_args *args)
        return 0;
 }
 
-SEC("raw_syscalls:sys_exit")
+SEC("tp/raw_syscalls/sys_exit")
 int sys_exit(struct syscall_exit_args *args)
 {
        struct syscall_exit_args exit_args;
diff --git a/tools/perf/util/bpf_skel/bench_uprobe.bpf.c b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c
new file mode 100644 (file)
index 0000000..2c55896
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2023 Red Hat
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+unsigned int nr_uprobes;
+
+SEC("uprobe")
+int BPF_UPROBE(empty)
+{
+       return 0;
+}
+
+SEC("uprobe")
+int BPF_UPROBE(trace_printk)
+{
+       char fmt[] = "perf bench uprobe %u";
+
+       bpf_trace_printk(fmt, sizeof(fmt), ++nr_uprobes);
+       return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
index 3672822..03c64b8 100644 (file)
@@ -560,7 +560,7 @@ char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
                               struct nsinfo *nsi, bool is_kallsyms,
                               bool is_vdso)
 {
-       char *realname = (char *)name, *filename;
+       char *realname = NULL, *filename;
        bool slash = is_kallsyms || is_vdso;
 
        if (!slash)
@@ -571,9 +571,7 @@ char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
                     sbuild_id ? "/" : "", sbuild_id ?: "") < 0)
                filename = NULL;
 
-       if (!slash)
-               free(realname);
-
+       free(realname);
        return filename;
 }
 
diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build
deleted file mode 100644 (file)
index 613ecfd..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-perf-$(CONFIG_CLANGLLVM) += clang.o
-perf-$(CONFIG_CLANGLLVM) += clang-test.o
diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h
deleted file mode 100644 (file)
index d3731a8..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PERF_UTIL_CLANG_C_H
-#define PERF_UTIL_CLANG_C_H
-
-#include <stddef.h>    /* for size_t */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef HAVE_LIBCLANGLLVM_SUPPORT
-extern void perf_clang__init(void);
-extern void perf_clang__cleanup(void);
-
-struct test_suite;
-extern int test__clang_to_IR(struct test_suite *test, int subtest);
-extern int test__clang_to_obj(struct test_suite *test, int subtest);
-
-extern int perf_clang__compile_bpf(const char *filename,
-                                  void **p_obj_buf,
-                                  size_t *p_obj_buf_sz);
-#else
-
-#include <errno.h>
-#include <linux/compiler.h>    /* for __maybe_unused */
-
-static inline void perf_clang__init(void) { }
-static inline void perf_clang__cleanup(void) { }
-
-static inline int
-perf_clang__compile_bpf(const char *filename __maybe_unused,
-                       void **p_obj_buf __maybe_unused,
-                       size_t *p_obj_buf_sz __maybe_unused)
-{
-       return -ENOTSUP;
-}
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-#endif
diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp
deleted file mode 100644 (file)
index a4683ca..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "clang.h"
-#include "clang-c.h"
-extern "C" {
-#include "../util.h"
-}
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-
-#include <tests/llvm.h>
-#include <string>
-
-class perf_clang_scope {
-public:
-       explicit perf_clang_scope() {perf_clang__init();}
-       ~perf_clang_scope() {perf_clang__cleanup();}
-};
-
-static std::unique_ptr<llvm::Module>
-__test__clang_to_IR(void)
-{
-       unsigned int kernel_version;
-
-       if (fetch_kernel_version(&kernel_version, NULL, 0))
-               return std::unique_ptr<llvm::Module>(nullptr);
-
-       std::string cflag_kver("-DLINUX_VERSION_CODE=" +
-                               std::to_string(kernel_version));
-
-       std::unique_ptr<llvm::Module> M =
-               perf::getModuleFromSource({cflag_kver.c_str()},
-                                         "perf-test.c",
-                                         test_llvm__bpf_base_prog);
-       return M;
-}
-
-extern "C" {
-int test__clang_to_IR(struct test_suite *test __maybe_unused,
-                      int subtest __maybe_unused)
-{
-       perf_clang_scope _scope;
-
-       auto M = __test__clang_to_IR();
-       if (!M)
-               return -1;
-       for (llvm::Function& F : *M)
-               if (F.getName() == "bpf_func__SyS_epoll_pwait")
-                       return 0;
-       return -1;
-}
-
-int test__clang_to_obj(struct test_suite *test __maybe_unused,
-                       int subtest __maybe_unused)
-{
-       perf_clang_scope _scope;
-
-       auto M = __test__clang_to_IR();
-       if (!M)
-               return -1;
-
-       auto Buffer = perf::getBPFObjectFromModule(&*M);
-       if (!Buffer)
-               return -1;
-       return 0;
-}
-
-}
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
deleted file mode 100644 (file)
index 1aad7d6..0000000
+++ /dev/null
@@ -1,225 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * llvm C frontend for perf. Support dynamically compile C file
- *
- * Inspired by clang example code:
- * http://llvm.org/svn/llvm-project/cfe/trunk/examples/clang-interpreter/main.cpp
- *
- * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com>
- * Copyright (C) 2016 Huawei Inc.
- */
-
-#include "clang/Basic/Version.h"
-#include "clang/CodeGen/CodeGenAction.h"
-#include "clang/Frontend/CompilerInvocation.h"
-#include "clang/Frontend/CompilerInstance.h"
-#include "clang/Frontend/TextDiagnosticPrinter.h"
-#include "clang/Tooling/Tooling.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Option/Option.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/ManagedStatic.h"
-#if CLANG_VERSION_MAJOR >= 14
-#include "llvm/MC/TargetRegistry.h"
-#else
-#include "llvm/Support/TargetRegistry.h"
-#endif
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include <memory>
-
-#include "clang.h"
-#include "clang-c.h"
-
-namespace perf {
-
-static std::unique_ptr<llvm::LLVMContext> LLVMCtx;
-
-using namespace clang;
-
-static CompilerInvocation *
-createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path,
-                        DiagnosticsEngine& Diags)
-{
-       llvm::opt::ArgStringList CCArgs {
-               "-cc1",
-               "-triple", "bpf-pc-linux",
-               "-fsyntax-only",
-               "-O2",
-               "-nostdsysteminc",
-               "-nobuiltininc",
-               "-vectorize-loops",
-               "-vectorize-slp",
-               "-Wno-unused-value",
-               "-Wno-pointer-sign",
-               "-x", "c"};
-
-       CCArgs.append(CFlags.begin(), CFlags.end());
-       CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs
-#if CLANG_VERSION_MAJOR >= 11
-                                                        ,/*BinaryName=*/nullptr
-#endif
-                                                        );
-
-       FrontendOptions& Opts = CI->getFrontendOpts();
-       Opts.Inputs.clear();
-       Opts.Inputs.emplace_back(Path,
-                       FrontendOptions::getInputKindForExtension("c"));
-       return CI;
-}
-
-static std::unique_ptr<llvm::Module>
-getModuleFromSource(llvm::opt::ArgStringList CFlags,
-                   StringRef Path, IntrusiveRefCntPtr<vfs::FileSystem> VFS)
-{
-       CompilerInstance Clang;
-       Clang.createDiagnostics();
-
-#if CLANG_VERSION_MAJOR < 9
-       Clang.setVirtualFileSystem(&*VFS);
-#else
-       Clang.createFileManager(&*VFS);
-#endif
-
-#if CLANG_VERSION_MAJOR < 4
-       IntrusiveRefCntPtr<CompilerInvocation> CI =
-               createCompilerInvocation(std::move(CFlags), Path,
-                                        Clang.getDiagnostics());
-       Clang.setInvocation(&*CI);
-#else
-       std::shared_ptr<CompilerInvocation> CI(
-               createCompilerInvocation(std::move(CFlags), Path,
-                                        Clang.getDiagnostics()));
-       Clang.setInvocation(CI);
-#endif
-
-       std::unique_ptr<CodeGenAction> Act(new EmitLLVMOnlyAction(&*LLVMCtx));
-       if (!Clang.ExecuteAction(*Act))
-               return std::unique_ptr<llvm::Module>(nullptr);
-
-       return Act->takeModule();
-}
-
-std::unique_ptr<llvm::Module>
-getModuleFromSource(llvm::opt::ArgStringList CFlags,
-                   StringRef Name, StringRef Content)
-{
-       using namespace vfs;
-
-       llvm::IntrusiveRefCntPtr<OverlayFileSystem> OverlayFS(
-                       new OverlayFileSystem(getRealFileSystem()));
-       llvm::IntrusiveRefCntPtr<InMemoryFileSystem> MemFS(
-                       new InMemoryFileSystem(true));
-
-       /*
-        * pushOverlay helps setting working dir for MemFS. Must call
-        * before addFile.
-        */
-       OverlayFS->pushOverlay(MemFS);
-       MemFS->addFile(Twine(Name), 0, llvm::MemoryBuffer::getMemBuffer(Content));
-
-       return getModuleFromSource(std::move(CFlags), Name, OverlayFS);
-}
-
-std::unique_ptr<llvm::Module>
-getModuleFromSource(llvm::opt::ArgStringList CFlags, StringRef Path)
-{
-       IntrusiveRefCntPtr<vfs::FileSystem> VFS(vfs::getRealFileSystem());
-       return getModuleFromSource(std::move(CFlags), Path, VFS);
-}
-
-std::unique_ptr<llvm::SmallVectorImpl<char>>
-getBPFObjectFromModule(llvm::Module *Module)
-{
-       using namespace llvm;
-
-       std::string TargetTriple("bpf-pc-linux");
-       std::string Error;
-       const Target* Target = TargetRegistry::lookupTarget(TargetTriple, Error);
-       if (!Target) {
-               llvm::errs() << Error;
-               return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);
-       }
-
-       llvm::TargetOptions Opt;
-       TargetMachine *TargetMachine =
-               Target->createTargetMachine(TargetTriple,
-                                           "generic", "",
-                                           Opt, Reloc::Static);
-
-       Module->setDataLayout(TargetMachine->createDataLayout());
-       Module->setTargetTriple(TargetTriple);
-
-       std::unique_ptr<SmallVectorImpl<char>> Buffer(new SmallVector<char, 0>());
-       raw_svector_ostream ostream(*Buffer);
-
-       legacy::PassManager PM;
-       bool NotAdded;
-       NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream
-#if CLANG_VERSION_MAJOR >= 7
-                                                      , /*DwoOut=*/nullptr
-#endif
-#if CLANG_VERSION_MAJOR < 10
-                                                      , TargetMachine::CGFT_ObjectFile
-#else
-                                                      , llvm::CGFT_ObjectFile
-#endif
-                                                      );
-       if (NotAdded) {
-               llvm::errs() << "TargetMachine can't emit a file of this type\n";
-               return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);
-       }
-       PM.run(*Module);
-
-       return Buffer;
-}
-
-}
-
-extern "C" {
-void perf_clang__init(void)
-{
-       perf::LLVMCtx.reset(new llvm::LLVMContext());
-       LLVMInitializeBPFTargetInfo();
-       LLVMInitializeBPFTarget();
-       LLVMInitializeBPFTargetMC();
-       LLVMInitializeBPFAsmPrinter();
-}
-
-void perf_clang__cleanup(void)
-{
-       perf::LLVMCtx.reset(nullptr);
-       llvm::llvm_shutdown();
-}
-
-int perf_clang__compile_bpf(const char *filename,
-                           void **p_obj_buf,
-                           size_t *p_obj_buf_sz)
-{
-       using namespace perf;
-
-       if (!p_obj_buf || !p_obj_buf_sz)
-               return -EINVAL;
-
-       llvm::opt::ArgStringList CFlags;
-       auto M = getModuleFromSource(std::move(CFlags), filename);
-       if (!M)
-               return  -EINVAL;
-       auto O = getBPFObjectFromModule(&*M);
-       if (!O)
-               return -EINVAL;
-
-       size_t size = O->size_in_bytes();
-       void *buffer;
-
-       buffer = malloc(size);
-       if (!buffer)
-               return -ENOMEM;
-       memcpy(buffer, O->data(), size);
-       *p_obj_buf = buffer;
-       *p_obj_buf_sz = size;
-       return 0;
-}
-}
diff --git a/tools/perf/util/c++/clang.h b/tools/perf/util/c++/clang.h
deleted file mode 100644 (file)
index 6ce33e2..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PERF_UTIL_CLANG_H
-#define PERF_UTIL_CLANG_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Option/Option.h"
-#include <memory>
-
-namespace perf {
-
-using namespace llvm;
-
-std::unique_ptr<Module>
-getModuleFromSource(opt::ArgStringList CFlags,
-                   StringRef Name, StringRef Content);
-
-std::unique_ptr<Module>
-getModuleFromSource(opt::ArgStringList CFlags,
-                   StringRef Path);
-
-std::unique_ptr<llvm::SmallVectorImpl<char>>
-getBPFObjectFromModule(llvm::Module *Module);
-
-}
-#endif
index 46f144c..7a650de 100644 (file)
@@ -16,7 +16,6 @@
 #include <subcmd/exec-cmd.h>
 #include "util/event.h"  /* proc_map_timeout */
 #include "util/hist.h"  /* perf_hist_config */
-#include "util/llvm-utils.h"   /* perf_llvm_config */
 #include "util/stat.h"  /* perf_stat__set_big_num */
 #include "util/evsel.h"  /* evsel__hw_names, evsel__use_bpf_counters */
 #include "util/srcline.h"  /* addr2line_timeout_ms */
@@ -486,9 +485,6 @@ int perf_default_config(const char *var, const char *value,
        if (strstarts(var, "call-graph."))
                return perf_callchain_config(var, value);
 
-       if (strstarts(var, "llvm."))
-               return perf_llvm_config(var, value);
-
        if (strstarts(var, "buildid."))
                return perf_buildid_config(var, value);
 
index 1419b40..9729d00 100644 (file)
@@ -6,10 +6,11 @@
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
  */
 
+#include <linux/kernel.h>
+#include <linux/bitfield.h>
 #include <linux/bitops.h>
 #include <linux/coresight-pmu.h>
 #include <linux/err.h>
-#include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/types.h>
 #include <linux/zalloc.h>
@@ -282,17 +283,6 @@ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
 }
 
 /*
- * FIELD_GET (linux/bitfield.h) not available outside kernel code,
- * and the header contains too many dependencies to just copy over,
- * so roll our own based on the original
- */
-#define __bf_shf(x) (__builtin_ffsll(x) - 1)
-#define FIELD_GET(_mask, _reg)                                         \
-       ({                                                              \
-               (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
-       })
-
-/*
  * Get a metadata for a specific cpu from an array.
  *
  */
index 46f74b2..1dbf278 100644 (file)
@@ -10,6 +10,8 @@
 #include <subcmd/exec-cmd.h>
 #include <linux/zalloc.h>
 #include <linux/build_bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
 
 #include "debug.h"
 #include "event.h"
@@ -63,6 +65,7 @@ static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al)
        d_al->addr = al->addr;
        d_al->comm = NULL;
        d_al->filtered = 0;
+       d_al->priv = NULL;
 }
 
 static struct addr_location *get_al(struct dlfilter *d)
@@ -151,6 +154,11 @@ static char **dlfilter__args(void *ctx, int *dlargc)
        return d->dlargv;
 }
 
+static bool has_priv(struct perf_dlfilter_al *d_al_p)
+{
+       return d_al_p->size >= offsetof(struct perf_dlfilter_al, priv) + sizeof(d_al_p->priv);
+}
+
 static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlfilter_al *d_al_p)
 {
        struct dlfilter *d = (struct dlfilter *)ctx;
@@ -166,6 +174,7 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf
        if (!thread)
                return -1;
 
+       addr_location__init(&al);
        thread__find_symbol_fb(thread, d->sample->cpumode, address, &al);
 
        al_to_d_al(&al, &d_al);
@@ -176,9 +185,31 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf
        memcpy(d_al_p, &d_al, min((size_t)sz, sizeof(d_al)));
        d_al_p->size = sz;
 
+       if (has_priv(d_al_p))
+               d_al_p->priv = memdup(&al, sizeof(al));
+       else /* Avoid leak for v0 API */
+               addr_location__exit(&al);
+
        return 0;
 }
 
+static void dlfilter__al_cleanup(void *ctx __maybe_unused, struct perf_dlfilter_al *d_al_p)
+{
+       struct addr_location *al;
+
+       /* Ensure backward compatibility */
+       if (!has_priv(d_al_p) || !d_al_p->priv)
+               return;
+
+       al = d_al_p->priv;
+
+       d_al_p->priv = NULL;
+
+       addr_location__exit(al);
+
+       free(al);
+}
+
 static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
 {
        struct dlfilter *d = (struct dlfilter *)ctx;
@@ -296,6 +327,7 @@ static const struct perf_dlfilter_fns perf_dlfilter_fns = {
        .resolve_addr    = dlfilter__resolve_addr,
        .args            = dlfilter__args,
        .resolve_address = dlfilter__resolve_address,
+       .al_cleanup      = dlfilter__al_cleanup,
        .insn            = dlfilter__insn,
        .srcline         = dlfilter__srcline,
        .attr            = dlfilter__attr,
index 9eabf3e..a164164 100644 (file)
@@ -324,11 +324,9 @@ int perf_env__read_pmu_mappings(struct perf_env *env)
        u32 pmu_num = 0;
        struct strbuf sb;
 
-       while ((pmu = perf_pmus__scan(pmu))) {
-               if (!pmu->name)
-                       continue;
+       while ((pmu = perf_pmus__scan(pmu)))
                pmu_num++;
-       }
+
        if (!pmu_num) {
                pr_debug("pmu mappings not available\n");
                return -ENOENT;
@@ -339,8 +337,6 @@ int perf_env__read_pmu_mappings(struct perf_env *env)
                return -ENOMEM;
 
        while ((pmu = perf_pmus__scan(pmu))) {
-               if (!pmu->name)
-                       continue;
                if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0)
                        goto error;
                /* include a NULL character at the end */
index 4cbb092..923c0fb 100644 (file)
@@ -93,8 +93,8 @@ struct process_symbol_args {
        u64        start;
 };
 
-static int find_symbol_cb(void *arg, const char *name, char type,
-                         u64 start)
+static int find_func_symbol_cb(void *arg, const char *name, char type,
+                              u64 start)
 {
        struct process_symbol_args *args = arg;
 
@@ -110,12 +110,36 @@ static int find_symbol_cb(void *arg, const char *name, char type,
        return 1;
 }
 
+static int find_any_symbol_cb(void *arg, const char *name,
+                             char type __maybe_unused, u64 start)
+{
+       struct process_symbol_args *args = arg;
+
+       if (strcmp(name, args->name))
+               return 0;
+
+       args->start = start;
+       return 1;
+}
+
 int kallsyms__get_function_start(const char *kallsyms_filename,
                                 const char *symbol_name, u64 *addr)
 {
        struct process_symbol_args args = { .name = symbol_name, };
 
-       if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
+       if (kallsyms__parse(kallsyms_filename, &args, find_func_symbol_cb) <= 0)
+               return -1;
+
+       *addr = args.start;
+       return 0;
+}
+
+int kallsyms__get_symbol_start(const char *kallsyms_filename,
+                              const char *symbol_name, u64 *addr)
+{
+       struct process_symbol_args args = { .name = symbol_name, };
+
+       if (kallsyms__parse(kallsyms_filename, &args, find_any_symbol_cb) <= 0)
                return -1;
 
        *addr = args.start;
index de20e01..d8bcee2 100644 (file)
@@ -360,6 +360,8 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL
 
 int kallsyms__get_function_start(const char *kallsyms_filename,
                                 const char *symbol_name, u64 *addr);
+int kallsyms__get_symbol_start(const char *kallsyms_filename,
+                              const char *symbol_name, u64 *addr);
 
 void event_attr_init(struct perf_event_attr *attr);
 
index 762e2b2..a8a5ff8 100644 (file)
@@ -845,6 +845,7 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
 {
        bool function = evsel__is_function_event(evsel);
        struct perf_event_attr *attr = &evsel->core.attr;
+       const char *arch = perf_env__arch(evsel__env(evsel));
 
        evsel__set_sample_bit(evsel, CALLCHAIN);
 
@@ -877,8 +878,9 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
                if (!function) {
                        evsel__set_sample_bit(evsel, REGS_USER);
                        evsel__set_sample_bit(evsel, STACK_USER);
-                       if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
-                               attr->sample_regs_user |= DWARF_MINIMAL_REGS;
+                       if (opts->sample_user_regs &&
+                           DWARF_MINIMAL_REGS(arch) != arch__user_reg_mask()) {
+                               attr->sample_regs_user |= DWARF_MINIMAL_REGS(arch);
                                pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
                                           "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
                                           "so the minimal registers set (IP, SP) is explicitly forced.\n");
@@ -1474,6 +1476,7 @@ void evsel__exit(struct evsel *evsel)
        perf_thread_map__put(evsel->core.threads);
        zfree(&evsel->group_name);
        zfree(&evsel->name);
+       zfree(&evsel->filter);
        zfree(&evsel->pmu_name);
        zfree(&evsel->group_pmu_name);
        zfree(&evsel->unit);
@@ -2826,9 +2829,6 @@ u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *n
 {
        struct tep_format_field *field = evsel__field(evsel, name);
 
-       if (!field)
-               return 0;
-
        return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
 }
 #endif
index 4814262..4488f30 100644 (file)
 #include "debug.h"
 #include "evlist.h"
 #include "expr.h"
-#include "expr-bison.h"
-#include "expr-flex.h"
+#include <util/expr-bison.h>
+#include <util/expr-flex.h>
 #include "util/hashmap.h"
+#include "util/header.h"
+#include "util/pmu.h"
 #include "smt.h"
 #include "tsc.h"
 #include <api/fs/fs.h>
@@ -425,6 +427,13 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx
                result = cpu__max_present_cpu().cpu;
                goto out;
        }
+       if (!strcmp("#num_cpus_online", literal)) {
+               struct perf_cpu_map *online = cpu_map__online();
+
+               if (online)
+                       result = perf_cpu_map__nr(online);
+               goto out;
+       }
 
        if (!strcasecmp("#system_tsc_freq", literal)) {
                result = arch_get_tsc_freq();
@@ -495,3 +504,19 @@ double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const
        evlist__delete(tmp);
        return ret;
 }
+
+double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused,
+                      bool compute_ids __maybe_unused, const char *test_id)
+{
+       double ret;
+       struct perf_pmu *pmu = pmu__find_core_pmu();
+       char *cpuid = perf_pmu__getcpuid(pmu);
+
+       if (!cpuid)
+               return NAN;
+
+       ret = !strcmp_cpuid_str(test_id, cpuid);
+
+       free(cpuid);
+       return ret;
+}
index 3c1e49b..c0cec29 100644 (file)
@@ -55,5 +55,6 @@ double expr_id_data__value(const struct expr_id_data *data);
 double expr_id_data__source_count(const struct expr_id_data *data);
 double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx);
 double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id);
+double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id);
 
 #endif
index dbb1174..0feef07 100644 (file)
@@ -114,6 +114,7 @@ if          { return IF; }
 else           { return ELSE; }
 source_count   { return SOURCE_COUNT; }
 has_event      { return HAS_EVENT; }
+strcmp_cpuid_str       { return STRCMP_CPUID_STR; }
 {literal}      { return literal(yyscanner, sctx); }
 {number}       { return value(yyscanner); }
 {symbol}       { return str(yyscanner, ID, sctx->runtime); }
index dd504af..6c93b35 100644 (file)
@@ -7,6 +7,8 @@
 #include "util/debug.h"
 #define IN_EXPR_Y 1
 #include "expr.h"
+#include "expr-bison.h"
+int expr_lex(YYSTYPE * yylval_param , void *yyscanner);
 %}
 
 %define api.pure full
@@ -37,7 +39,7 @@
        } ids;
 }
 
-%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT HAS_EVENT EXPR_ERROR
+%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT HAS_EVENT STRCMP_CPUID_STR EXPR_ERROR
 %left MIN MAX IF
 %left '|'
 %left '^'
@@ -56,7 +58,7 @@
 static void expr_error(double *final_val __maybe_unused,
                       struct expr_parse_ctx *ctx __maybe_unused,
                       bool compute_ids __maybe_unused,
-                      void *scanner,
+                      void *scanner __maybe_unused,
                       const char *s)
 {
        pr_debug("%s\n", s);
@@ -205,6 +207,12 @@ expr: NUMBER
        $$.ids = NULL;
        free($3);
 }
+| STRCMP_CPUID_STR '(' ID ')'
+{
+       $$.val = expr__strcmp_cpuid_str(ctx, compute_ids, $3);
+       $$.ids = NULL;
+       free($3);
+}
 | expr '|' expr
 {
        if (is_const($1.val) && is_const($3.val)) {
index 52fbf52..d812e1e 100644 (file)
@@ -456,6 +456,8 @@ static int write_cpudesc(struct feat_fd *ff,
 #define CPUINFO_PROC   { "Processor", }
 #elif defined(__xtensa__)
 #define CPUINFO_PROC   { "core ID", }
+#elif defined(__loongarch__)
+#define CPUINFO_PROC   { "Model Name", }
 #else
 #define CPUINFO_PROC   { "model name", }
 #endif
@@ -746,20 +748,14 @@ static int write_pmu_mappings(struct feat_fd *ff,
         * Do a first pass to count number of pmu to avoid lseek so this
         * works in pipe mode as well.
         */
-       while ((pmu = perf_pmus__scan(pmu))) {
-               if (!pmu->name)
-                       continue;
+       while ((pmu = perf_pmus__scan(pmu)))
                pmu_num++;
-       }
 
        ret = do_write(ff, &pmu_num, sizeof(pmu_num));
        if (ret < 0)
                return ret;
 
        while ((pmu = perf_pmus__scan(pmu))) {
-               if (!pmu->name)
-                       continue;
-
                ret = do_write(ff, &pmu->type, sizeof(pmu->type));
                if (ret < 0)
                        return ret;
@@ -1605,8 +1601,15 @@ static int write_pmu_caps(struct feat_fd *ff,
        int ret;
 
        while ((pmu = perf_pmus__scan(pmu))) {
-               if (!pmu->name || !strcmp(pmu->name, "cpu") ||
-                   perf_pmu__caps_parse(pmu) <= 0)
+               if (!strcmp(pmu->name, "cpu")) {
+                       /*
+                        * The "cpu" PMU is special and covered by
+                        * HEADER_CPU_PMU_CAPS. Note, core PMUs are
+                        * counted/written here for ARM, s390 and Intel hybrid.
+                        */
+                       continue;
+               }
+               if (perf_pmu__caps_parse(pmu) <= 0)
                        continue;
                nr_pmu++;
        }
@@ -1619,23 +1622,17 @@ static int write_pmu_caps(struct feat_fd *ff,
                return 0;
 
        /*
-        * Write hybrid pmu caps first to maintain compatibility with
-        * older perf tool.
+        * Note older perf tools assume core PMUs come first, this is a property
+        * of perf_pmus__scan.
         */
-       if (perf_pmus__num_core_pmus() > 1) {
-               pmu = NULL;
-               while ((pmu = perf_pmus__scan_core(pmu))) {
-                       ret = __write_pmu_caps(ff, pmu, true);
-                       if (ret < 0)
-                               return ret;
-               }
-       }
-
        pmu = NULL;
        while ((pmu = perf_pmus__scan(pmu))) {
-               if (pmu->is_core || !pmu->nr_caps)
+               if (!strcmp(pmu->name, "cpu")) {
+                       /* Skip as above. */
+                       continue;
+               }
+               if (perf_pmu__caps_parse(pmu) <= 0)
                        continue;
-
                ret = __write_pmu_caps(ff, pmu, true);
                if (ret < 0)
                        return ret;
@@ -4381,7 +4378,8 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
                             union perf_event *event,
                             struct evlist **pevlist)
 {
-       u32 i, ids, n_ids;
+       u32 i, n_ids;
+       u64 *ids;
        struct evsel *evsel;
        struct evlist *evlist = *pevlist;
 
@@ -4397,9 +4395,8 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
 
        evlist__add(evlist, evsel);
 
-       ids = event->header.size;
-       ids -= (void *)&event->attr.id - (void *)event;
-       n_ids = ids / sizeof(u64);
+       n_ids = event->header.size - sizeof(event->header) - event->attr.attr.size;
+       n_ids = n_ids / sizeof(u64);
        /*
         * We don't have the cpu and thread maps on the header, so
         * for allocating the perf_sample_id table we fake 1 cpu and
@@ -4408,8 +4405,9 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
        if (perf_evsel__alloc_id(&evsel->core, 1, n_ids))
                return -ENOMEM;
 
+       ids = perf_record_header_attr_id(event);
        for (i = 0; i < n_ids; i++) {
-               perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]);
+               perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, ids[i]);
        }
 
        return 0;
index 014d821..37ecef0 100644 (file)
@@ -18,8 +18,6 @@
  * defined before including "unwind.h"
  */
 #define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum)
-#define LIBUNWIND__ARCH_REG_IP PERF_REG_ARM64_PC
-#define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP
 
 #include "unwind.h"
 #include "libunwind-aarch64.h"
index b2b92d0..1697dec 100644 (file)
@@ -18,8 +18,6 @@
  * defined before including "unwind.h"
  */
 #define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum)
-#define LIBUNWIND__ARCH_REG_IP PERF_REG_X86_IP
-#define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP
 
 #include "unwind.h"
 #include "libunwind-x86.h"
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
deleted file mode 100644 (file)
index c6c9c22..0000000
+++ /dev/null
@@ -1,612 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
- * Copyright (C) 2015, Huawei Inc.
- */
-
-#include <errno.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <linux/err.h>
-#include <linux/string.h>
-#include <linux/zalloc.h>
-#include "debug.h"
-#include "llvm-utils.h"
-#include "config.h"
-#include "util.h"
-#include <sys/wait.h>
-#include <subcmd/exec-cmd.h>
-
-#define CLANG_BPF_CMD_DEFAULT_TEMPLATE                         \
-               "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
-               "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE "     \
-               "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
-               "-Wno-unused-value -Wno-pointer-sign "          \
-               "-working-directory $WORKING_DIR "              \
-               "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -g -O2 -o - $LLVM_OPTIONS_PIPE"
-
-struct llvm_param llvm_param = {
-       .clang_path = "clang",
-       .llc_path = "llc",
-       .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE,
-       .clang_opt = NULL,
-       .opts = NULL,
-       .kbuild_dir = NULL,
-       .kbuild_opts = NULL,
-       .user_set_param = false,
-};
-
-static void version_notice(void);
-
-int perf_llvm_config(const char *var, const char *value)
-{
-       if (!strstarts(var, "llvm."))
-               return 0;
-       var += sizeof("llvm.") - 1;
-
-       if (!strcmp(var, "clang-path"))
-               llvm_param.clang_path = strdup(value);
-       else if (!strcmp(var, "clang-bpf-cmd-template"))
-               llvm_param.clang_bpf_cmd_template = strdup(value);
-       else if (!strcmp(var, "clang-opt"))
-               llvm_param.clang_opt = strdup(value);
-       else if (!strcmp(var, "kbuild-dir"))
-               llvm_param.kbuild_dir = strdup(value);
-       else if (!strcmp(var, "kbuild-opts"))
-               llvm_param.kbuild_opts = strdup(value);
-       else if (!strcmp(var, "dump-obj"))
-               llvm_param.dump_obj = !!perf_config_bool(var, value);
-       else if (!strcmp(var, "opts"))
-               llvm_param.opts = strdup(value);
-       else {
-               pr_debug("Invalid LLVM config option: %s\n", value);
-               return -1;
-       }
-       llvm_param.user_set_param = true;
-       return 0;
-}
-
-static int
-search_program(const char *def, const char *name,
-              char *output)
-{
-       char *env, *path, *tmp = NULL;
-       char buf[PATH_MAX];
-       int ret;
-
-       output[0] = '\0';
-       if (def && def[0] != '\0') {
-               if (def[0] == '/') {
-                       if (access(def, F_OK) == 0) {
-                               strlcpy(output, def, PATH_MAX);
-                               return 0;
-                       }
-               } else if (def[0] != '\0')
-                       name = def;
-       }
-
-       env = getenv("PATH");
-       if (!env)
-               return -1;
-       env = strdup(env);
-       if (!env)
-               return -1;
-
-       ret = -ENOENT;
-       path = strtok_r(env, ":",  &tmp);
-       while (path) {
-               scnprintf(buf, sizeof(buf), "%s/%s", path, name);
-               if (access(buf, F_OK) == 0) {
-                       strlcpy(output, buf, PATH_MAX);
-                       ret = 0;
-                       break;
-               }
-               path = strtok_r(NULL, ":", &tmp);
-       }
-
-       free(env);
-       return ret;
-}
-
-static int search_program_and_warn(const char *def, const char *name,
-                                  char *output)
-{
-       int ret = search_program(def, name, output);
-
-       if (ret) {
-               pr_err("ERROR:\tunable to find %s.\n"
-                      "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
-                      "     \tand '%s-path' option in [llvm] section of ~/.perfconfig.\n",
-                      name, name);
-               version_notice();
-       }
-       return ret;
-}
-
-#define READ_SIZE      4096
-static int
-read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
-{
-       int err = 0;
-       void *buf = NULL;
-       FILE *file = NULL;
-       size_t read_sz = 0, buf_sz = 0;
-       char serr[STRERR_BUFSIZE];
-
-       file = popen(cmd, "r");
-       if (!file) {
-               pr_err("ERROR: unable to popen cmd: %s\n",
-                      str_error_r(errno, serr, sizeof(serr)));
-               return -EINVAL;
-       }
-
-       while (!feof(file) && !ferror(file)) {
-               /*
-                * Make buf_sz always have obe byte extra space so we
-                * can put '\0' there.
-                */
-               if (buf_sz - read_sz < READ_SIZE + 1) {
-                       void *new_buf;
-
-                       buf_sz = read_sz + READ_SIZE + 1;
-                       new_buf = realloc(buf, buf_sz);
-
-                       if (!new_buf) {
-                               pr_err("ERROR: failed to realloc memory\n");
-                               err = -ENOMEM;
-                               goto errout;
-                       }
-
-                       buf = new_buf;
-               }
-               read_sz += fread(buf + read_sz, 1, READ_SIZE, file);
-       }
-
-       if (buf_sz - read_sz < 1) {
-               pr_err("ERROR: internal error\n");
-               err = -EINVAL;
-               goto errout;
-       }
-
-       if (ferror(file)) {
-               pr_err("ERROR: error occurred when reading from pipe: %s\n",
-                      str_error_r(errno, serr, sizeof(serr)));
-               err = -EIO;
-               goto errout;
-       }
-
-       err = WEXITSTATUS(pclose(file));
-       file = NULL;
-       if (err) {
-               err = -EINVAL;
-               goto errout;
-       }
-
-       /*
-        * If buf is string, give it terminal '\0' to make our life
-        * easier. If buf is not string, that '\0' is out of space
-        * indicated by read_sz so caller won't even notice it.
-        */
-       ((char *)buf)[read_sz] = '\0';
-
-       if (!p_buf)
-               free(buf);
-       else
-               *p_buf = buf;
-
-       if (p_read_sz)
-               *p_read_sz = read_sz;
-       return 0;
-
-errout:
-       if (file)
-               pclose(file);
-       free(buf);
-       if (p_buf)
-               *p_buf = NULL;
-       if (p_read_sz)
-               *p_read_sz = 0;
-       return err;
-}
-
-static inline void
-force_set_env(const char *var, const char *value)
-{
-       if (value) {
-               setenv(var, value, 1);
-               pr_debug("set env: %s=%s\n", var, value);
-       } else {
-               unsetenv(var);
-               pr_debug("unset env: %s\n", var);
-       }
-}
-
-static void
-version_notice(void)
-{
-       pr_err(
-"     \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n"
-"     \tYou may want to try git trunk:\n"
-"     \t\tgit clone http://llvm.org/git/llvm.git\n"
-"     \t\t     and\n"
-"     \t\tgit clone http://llvm.org/git/clang.git\n\n"
-"     \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n"
-"     \tdebian/ubuntu:\n"
-"     \t\thttps://apt.llvm.org/\n\n"
-"     \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
-"     \toption in [llvm] section of ~/.perfconfig to:\n\n"
-"     \t  \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n"
-"     \t     -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
-"     \t     -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
-"     \t(Replace /path/to/llc with path to your llc)\n\n"
-);
-}
-
-static int detect_kbuild_dir(char **kbuild_dir)
-{
-       const char *test_dir = llvm_param.kbuild_dir;
-       const char *prefix_dir = "";
-       const char *suffix_dir = "";
-
-       /* _UTSNAME_LENGTH is 65 */
-       char release[128];
-
-       char *autoconf_path;
-
-       int err;
-
-       if (!test_dir) {
-               err = fetch_kernel_version(NULL, release,
-                                          sizeof(release));
-               if (err)
-                       return -EINVAL;
-
-               test_dir = release;
-               prefix_dir = "/lib/modules/";
-               suffix_dir = "/build";
-       }
-
-       err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h",
-                      prefix_dir, test_dir, suffix_dir);
-       if (err < 0)
-               return -ENOMEM;
-
-       if (access(autoconf_path, R_OK) == 0) {
-               free(autoconf_path);
-
-               err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir,
-                              suffix_dir);
-               if (err < 0)
-                       return -ENOMEM;
-               return 0;
-       }
-       pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n",
-                __func__, autoconf_path);
-       free(autoconf_path);
-       return -ENOENT;
-}
-
-static const char *kinc_fetch_script =
-"#!/usr/bin/env sh\n"
-"if ! test -d \"$KBUILD_DIR\"\n"
-"then\n"
-"      exit 1\n"
-"fi\n"
-"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
-"then\n"
-"      exit 1\n"
-"fi\n"
-"TMPDIR=`mktemp -d`\n"
-"if test -z \"$TMPDIR\"\n"
-"then\n"
-"    exit 1\n"
-"fi\n"
-"cat << EOF > $TMPDIR/Makefile\n"
-"obj-y := dummy.o\n"
-"\\$(obj)/%.o: \\$(src)/%.c\n"
-"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n"
-"\t\\$(CC) -c -o \\$@ \\$<\n"
-"EOF\n"
-"touch $TMPDIR/dummy.c\n"
-"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n"
-"RET=$?\n"
-"rm -rf $TMPDIR\n"
-"exit $RET\n";
-
-void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts)
-{
-       static char *saved_kbuild_dir;
-       static char *saved_kbuild_include_opts;
-       int err;
-
-       if (!kbuild_dir || !kbuild_include_opts)
-               return;
-
-       *kbuild_dir = NULL;
-       *kbuild_include_opts = NULL;
-
-       if (saved_kbuild_dir && saved_kbuild_include_opts &&
-           !IS_ERR(saved_kbuild_dir) && !IS_ERR(saved_kbuild_include_opts)) {
-               *kbuild_dir = strdup(saved_kbuild_dir);
-               *kbuild_include_opts = strdup(saved_kbuild_include_opts);
-
-               if (*kbuild_dir && *kbuild_include_opts)
-                       return;
-
-               zfree(kbuild_dir);
-               zfree(kbuild_include_opts);
-               /*
-                * Don't fall through: it may breaks saved_kbuild_dir and
-                * saved_kbuild_include_opts if detect them again when
-                * memory is low.
-                */
-               return;
-       }
-
-       if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) {
-               pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n");
-               pr_debug("Skip kbuild options detection.\n");
-               goto errout;
-       }
-
-       err = detect_kbuild_dir(kbuild_dir);
-       if (err) {
-               pr_warning(
-"WARNING:\tunable to get correct kernel building directory.\n"
-"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n"
-"     \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n"
-"     \tdetection.\n\n");
-               goto errout;
-       }
-
-       pr_debug("Kernel build dir is set to %s\n", *kbuild_dir);
-       force_set_env("KBUILD_DIR", *kbuild_dir);
-       force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts);
-       err = read_from_pipe(kinc_fetch_script,
-                            (void **)kbuild_include_opts,
-                            NULL);
-       if (err) {
-               pr_warning(
-"WARNING:\tunable to get kernel include directories from '%s'\n"
-"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n"
-"     \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n"
-"     \toption in [llvm] to \"\" to suppress this detection.\n\n",
-                       *kbuild_dir);
-
-               zfree(kbuild_dir);
-               goto errout;
-       }
-
-       pr_debug("include option is set to %s\n", *kbuild_include_opts);
-
-       saved_kbuild_dir = strdup(*kbuild_dir);
-       saved_kbuild_include_opts = strdup(*kbuild_include_opts);
-
-       if (!saved_kbuild_dir || !saved_kbuild_include_opts) {
-               zfree(&saved_kbuild_dir);
-               zfree(&saved_kbuild_include_opts);
-       }
-       return;
-errout:
-       saved_kbuild_dir = ERR_PTR(-EINVAL);
-       saved_kbuild_include_opts = ERR_PTR(-EINVAL);
-}
-
-int llvm__get_nr_cpus(void)
-{
-       static int nr_cpus_avail = 0;
-       char serr[STRERR_BUFSIZE];
-
-       if (nr_cpus_avail > 0)
-               return nr_cpus_avail;
-
-       nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF);
-       if (nr_cpus_avail <= 0) {
-               pr_err(
-"WARNING:\tunable to get available CPUs in this system: %s\n"
-"        \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr)));
-               nr_cpus_avail = 128;
-       }
-       return nr_cpus_avail;
-}
-
-void llvm__dump_obj(const char *path, void *obj_buf, size_t size)
-{
-       char *obj_path = strdup(path);
-       FILE *fp;
-       char *p;
-
-       if (!obj_path) {
-               pr_warning("WARNING: Not enough memory, skip object dumping\n");
-               return;
-       }
-
-       p = strrchr(obj_path, '.');
-       if (!p || (strcmp(p, ".c") != 0)) {
-               pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n",
-                          obj_path);
-               goto out;
-       }
-
-       p[1] = 'o';
-       fp = fopen(obj_path, "wb");
-       if (!fp) {
-               pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n",
-                          obj_path, strerror(errno));
-               goto out;
-       }
-
-       pr_debug("LLVM: dumping %s\n", obj_path);
-       if (fwrite(obj_buf, size, 1, fp) != 1)
-               pr_debug("WARNING: failed to write to file '%s': %s, skip object dumping\n", obj_path, strerror(errno));
-       fclose(fp);
-out:
-       free(obj_path);
-}
-
-int llvm__compile_bpf(const char *path, void **p_obj_buf,
-                     size_t *p_obj_buf_sz)
-{
-       size_t obj_buf_sz;
-       void *obj_buf = NULL;
-       int err, nr_cpus_avail;
-       unsigned int kernel_version;
-       char linux_version_code_str[64];
-       const char *clang_opt = llvm_param.clang_opt;
-       char clang_path[PATH_MAX], llc_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
-       char serr[STRERR_BUFSIZE];
-       char *kbuild_dir = NULL, *kbuild_include_opts = NULL,
-            *perf_bpf_include_opts = NULL;
-       const char *template = llvm_param.clang_bpf_cmd_template;
-       char *pipe_template = NULL;
-       const char *opts = llvm_param.opts;
-       char *command_echo = NULL, *command_out;
-       char *libbpf_include_dir = system_path(LIBBPF_INCLUDE_DIR);
-
-       if (path[0] != '-' && realpath(path, abspath) == NULL) {
-               err = errno;
-               pr_err("ERROR: problems with path %s: %s\n",
-                      path, str_error_r(err, serr, sizeof(serr)));
-               return -err;
-       }
-
-       if (!template)
-               template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
-
-       err = search_program_and_warn(llvm_param.clang_path,
-                            "clang", clang_path);
-       if (err)
-               return -ENOENT;
-
-       /*
-        * This is an optional work. Even it fail we can continue our
-        * work. Needn't check error return.
-        */
-       llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts);
-
-       nr_cpus_avail = llvm__get_nr_cpus();
-       snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d",
-                nr_cpus_avail);
-
-       if (fetch_kernel_version(&kernel_version, NULL, 0))
-               kernel_version = 0;
-
-       snprintf(linux_version_code_str, sizeof(linux_version_code_str),
-                "0x%x", kernel_version);
-       if (asprintf(&perf_bpf_include_opts, "-I%s/", libbpf_include_dir) < 0)
-               goto errout;
-       force_set_env("NR_CPUS", nr_cpus_avail_str);
-       force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
-       force_set_env("CLANG_EXEC", clang_path);
-       force_set_env("CLANG_OPTIONS", clang_opt);
-       force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
-       force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts);
-       force_set_env("WORKING_DIR", kbuild_dir ? : ".");
-
-       if (opts) {
-               err = search_program_and_warn(llvm_param.llc_path, "llc", llc_path);
-               if (err)
-                       goto errout;
-
-               err = -ENOMEM;
-               if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -",
-                             template, llc_path, opts) < 0) {
-                       pr_err("ERROR:\tnot enough memory to setup command line\n");
-                       goto errout;
-               }
-
-               template = pipe_template;
-
-       }
-
-       /*
-        * Since we may reset clang's working dir, path of source file
-        * should be transferred into absolute path, except we want
-        * stdin to be source file (testing).
-        */
-       force_set_env("CLANG_SOURCE",
-                     (path[0] == '-') ? path : abspath);
-
-       pr_debug("llvm compiling command template: %s\n", template);
-
-       /*
-        * Below, substitute control characters for values that can cause the
-        * echo to misbehave, then substitute the values back.
-        */
-       err = -ENOMEM;
-       if (asprintf(&command_echo, "echo -n \a%s\a", template) < 0)
-               goto errout;
-
-#define SWAP_CHAR(a, b) do { if (*p == a) *p = b; } while (0)
-       for (char *p = command_echo; *p; p++) {
-               SWAP_CHAR('<', '\001');
-               SWAP_CHAR('>', '\002');
-               SWAP_CHAR('"', '\003');
-               SWAP_CHAR('\'', '\004');
-               SWAP_CHAR('|', '\005');
-               SWAP_CHAR('&', '\006');
-               SWAP_CHAR('\a', '"');
-       }
-       err = read_from_pipe(command_echo, (void **) &command_out, NULL);
-       if (err)
-               goto errout;
-
-       for (char *p = command_out; *p; p++) {
-               SWAP_CHAR('\001', '<');
-               SWAP_CHAR('\002', '>');
-               SWAP_CHAR('\003', '"');
-               SWAP_CHAR('\004', '\'');
-               SWAP_CHAR('\005', '|');
-               SWAP_CHAR('\006', '&');
-       }
-#undef SWAP_CHAR
-       pr_debug("llvm compiling command : %s\n", command_out);
-
-       err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
-       if (err) {
-               pr_err("ERROR:\tunable to compile %s\n", path);
-               pr_err("Hint:\tCheck error message shown above.\n");
-               pr_err("Hint:\tYou can also pre-compile it into .o using:\n");
-               pr_err("     \t\tclang --target=bpf -O2 -c %s\n", path);
-               pr_err("     \twith proper -I and -D options.\n");
-               goto errout;
-       }
-
-       free(command_echo);
-       free(command_out);
-       free(kbuild_dir);
-       free(kbuild_include_opts);
-       free(perf_bpf_include_opts);
-       free(libbpf_include_dir);
-
-       if (!p_obj_buf)
-               free(obj_buf);
-       else
-               *p_obj_buf = obj_buf;
-
-       if (p_obj_buf_sz)
-               *p_obj_buf_sz = obj_buf_sz;
-       return 0;
-errout:
-       free(command_echo);
-       free(kbuild_dir);
-       free(kbuild_include_opts);
-       free(obj_buf);
-       free(perf_bpf_include_opts);
-       free(libbpf_include_dir);
-       free(pipe_template);
-       if (p_obj_buf)
-               *p_obj_buf = NULL;
-       if (p_obj_buf_sz)
-               *p_obj_buf_sz = 0;
-       return err;
-}
-
-int llvm__search_clang(void)
-{
-       char clang_path[PATH_MAX];
-
-       return search_program_and_warn(llvm_param.clang_path, "clang", clang_path);
-}
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
deleted file mode 100644 (file)
index 7878a0e..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
- * Copyright (C) 2015, Huawei Inc.
- */
-#ifndef __LLVM_UTILS_H
-#define __LLVM_UTILS_H
-
-#include <stdbool.h>
-
-struct llvm_param {
-       /* Path of clang executable */
-       const char *clang_path;
-       /* Path of llc executable */
-       const char *llc_path;
-       /*
-        * Template of clang bpf compiling. 5 env variables
-        * can be used:
-        *   $CLANG_EXEC:               Path to clang.
-        *   $CLANG_OPTIONS:            Extra options to clang.
-        *   $KERNEL_INC_OPTIONS:       Kernel include directories.
-        *   $WORKING_DIR:              Kernel source directory.
-        *   $CLANG_SOURCE:             Source file to be compiled.
-        */
-       const char *clang_bpf_cmd_template;
-       /* Will be filled in $CLANG_OPTIONS */
-       const char *clang_opt;
-       /*
-        * If present it'll add -emit-llvm to $CLANG_OPTIONS to pipe
-        * the clang output to llc, useful for new llvm options not
-        * yet selectable via 'clang -mllvm option', such as -mattr=dwarfris
-        * in clang 6.0/llvm 7
-        */
-       const char *opts;
-       /* Where to find kbuild system */
-       const char *kbuild_dir;
-       /*
-        * Arguments passed to make, like 'ARCH=arm' if doing cross
-        * compiling. Should not be used for dynamic compiling.
-        */
-       const char *kbuild_opts;
-       /*
-        * Default is false. If set to true, write compiling result
-        * to object file.
-        */
-       bool dump_obj;
-       /*
-        * Default is false. If one of the above fields is set by user
-        * explicitly then user_set_llvm is set to true. This is used
-        * for perf test. If user doesn't set anything in .perfconfig
-        * and clang is not found, don't trigger llvm test.
-        */
-       bool user_set_param;
-};
-
-extern struct llvm_param llvm_param;
-int perf_llvm_config(const char *var, const char *value);
-
-int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz);
-
-/* This function is for test__llvm() use only */
-int llvm__search_clang(void);
-
-/* Following functions are reused by builtin clang support */
-void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts);
-int llvm__get_nr_cpus(void);
-
-void llvm__dump_obj(const char *path, void *obj_buf, size_t size);
-#endif
index 51424cd..af9a976 100644 (file)
@@ -45,15 +45,13 @@ int lzma_decompress_to_file(const char *input, int output_fd)
 
        infile = fopen(input, "rb");
        if (!infile) {
-               pr_err("lzma: fopen failed on %s: '%s'\n",
-                      input, strerror(errno));
+               pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno));
                return -1;
        }
 
        ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
        if (ret != LZMA_OK) {
-               pr_err("lzma: lzma_stream_decoder failed %s (%d)\n",
-                       lzma_strerror(ret), ret);
+               pr_debug("lzma: lzma_stream_decoder failed %s (%d)\n", lzma_strerror(ret), ret);
                goto err_fclose;
        }
 
@@ -68,7 +66,7 @@ int lzma_decompress_to_file(const char *input, int output_fd)
                        strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile);
 
                        if (ferror(infile)) {
-                               pr_err("lzma: read error: %s\n", strerror(errno));
+                               pr_debug("lzma: read error: %s\n", strerror(errno));
                                goto err_lzma_end;
                        }
 
@@ -82,7 +80,7 @@ int lzma_decompress_to_file(const char *input, int output_fd)
                        ssize_t write_size = sizeof(buf_out) - strm.avail_out;
 
                        if (writen(output_fd, buf_out, write_size) != write_size) {
-                               pr_err("lzma: write error: %s\n", strerror(errno));
+                               pr_debug("lzma: write error: %s\n", strerror(errno));
                                goto err_lzma_end;
                        }
 
@@ -94,7 +92,7 @@ int lzma_decompress_to_file(const char *input, int output_fd)
                        if (ret == LZMA_STREAM_END)
                                break;
 
-                       pr_err("lzma: failed %s\n", lzma_strerror(ret));
+                       pr_debug("lzma: failed %s\n", lzma_strerror(ret));
                        goto err_lzma_end;
                }
        }
index f4cb41e..88f31b3 100644 (file)
@@ -1215,7 +1215,9 @@ static int machine__get_running_kernel_start(struct machine *machine,
 
        *start = addr;
 
-       err = kallsyms__get_function_start(filename, "_etext", &addr);
+       err = kallsyms__get_symbol_start(filename, "_edata", &addr);
+       if (err)
+               err = kallsyms__get_function_start(filename, "_etext", &addr);
        if (!err)
                *end = addr;
 
index c07fe3a..39ffe8c 100644 (file)
@@ -37,7 +37,7 @@ struct perf_mem_event * __weak perf_mem_events__ptr(int i)
        return &perf_mem_events[i];
 }
 
-char * __weak perf_mem_events__name(int i, char *pmu_name  __maybe_unused)
+const char * __weak perf_mem_events__name(int i, const char *pmu_name  __maybe_unused)
 {
        struct perf_mem_event *e = perf_mem_events__ptr(i);
 
@@ -53,7 +53,7 @@ char * __weak perf_mem_events__name(int i, char *pmu_name  __maybe_unused)
                return mem_loads_name;
        }
 
-       return (char *)e->name;
+       return e->name;
 }
 
 __weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
@@ -186,7 +186,6 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
        int i = *argv_nr, k = 0;
        struct perf_mem_event *e;
        struct perf_pmu *pmu;
-       char *s;
 
        for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
                e = perf_mem_events__ptr(j);
@@ -209,15 +208,16 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
                        }
 
                        while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+                               const char *s = perf_mem_events__name(j, pmu->name);
+
                                rec_argv[i++] = "-e";
-                               s = perf_mem_events__name(j, pmu->name);
                                if (s) {
-                                       s = strdup(s);
-                                       if (!s)
+                                       char *copy = strdup(s);
+                                       if (!copy)
                                                return -1;
 
-                                       rec_argv[i++] = s;
-                                       rec_tmp[k++] = s;
+                                       rec_argv[i++] = copy;
+                                       rec_tmp[k++] = copy;
                                }
                        }
                }
index 1237230..b40ad6e 100644 (file)
@@ -38,7 +38,7 @@ extern unsigned int perf_mem_events__loads_ldlat;
 int perf_mem_events__parse(const char *str);
 int perf_mem_events__init(void);
 
-char *perf_mem_events__name(int i, char *pmu_name);
+const char *perf_mem_events__name(int i, const char *pmu_name);
 struct perf_mem_event *perf_mem_events__ptr(int i);
 bool is_mem_loads_aux_event(struct evsel *leader);
 
index a6a5ed4..6231044 100644 (file)
@@ -527,7 +527,7 @@ void metricgroup__print(const struct print_callbacks *print_cb, void *print_stat
        groups.node_delete = mep_delete;
        table = pmu_metrics_table__find();
        if (table) {
-               pmu_metrics_table_for_each_metric(table,
+               pmu_metrics_table__for_each_metric(table,
                                                 metricgroup__add_to_mep_groups_callback,
                                                 &groups);
        }
@@ -1069,7 +1069,7 @@ static bool metricgroup__find_metric(const char *pmu,
                .pm = pm,
        };
 
-       return pmu_metrics_table_for_each_metric(table, metricgroup__find_metric_callback, &data)
+       return pmu_metrics_table__for_each_metric(table, metricgroup__find_metric_callback, &data)
                ? true : false;
 }
 
@@ -1255,7 +1255,7 @@ static int metricgroup__add_metric(const char *pmu, const char *metric_name, con
                 * Iterate over all metrics seeing if metric matches either the
                 * name or group. When it does add the metric to the list.
                 */
-               ret = pmu_metrics_table_for_each_metric(table, metricgroup__add_metric_callback,
+               ret = pmu_metrics_table__for_each_metric(table, metricgroup__add_metric_callback,
                                                       &data);
                if (ret)
                        goto out;
@@ -1740,7 +1740,7 @@ bool metricgroup__has_metric(const char *pmu, const char *metric)
        if (!table)
                return false;
 
-       return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback, &data)
+       return pmu_metrics_table__for_each_metric(table, metricgroup__has_metric_callback, &data)
                ? true : false;
 }
 
@@ -1770,7 +1770,7 @@ unsigned int metricgroups__topdown_max_level(void)
        if (!table)
                return false;
 
-       pmu_metrics_table_for_each_metric(table, metricgroup__topdown_max_level_callback,
+       pmu_metrics_table__for_each_metric(table, metricgroup__topdown_max_level_callback,
                                          &max_level);
        return max_level;
 }
index c9ec0ca..65608a3 100644 (file)
 #include <subcmd/parse-options.h>
 #include "parse-events.h"
 #include "string2.h"
-#include "strlist.h"
-#include "bpf-loader.h"
+#include "strbuf.h"
 #include "debug.h"
 #include <api/fs/tracing_path.h>
 #include <perf/cpumap.h>
-#include "parse-events-bison.h"
-#include "parse-events-flex.h"
+#include <util/parse-events-bison.h>
+#include <util/parse-events-flex.h>
 #include "pmu.h"
 #include "pmus.h"
 #include "asm/bug.h"
@@ -35,7 +34,6 @@
 #ifdef PARSER_DEBUG
 extern int parse_events_debug;
 #endif
-int parse_events_parse(void *parse_state, void *scanner);
 static int get_config_terms(struct list_head *head_config,
                            struct list_head *head_terms __maybe_unused);
 
@@ -155,7 +153,7 @@ const char *event_type(int type)
        return "unknown";
 }
 
-static char *get_config_str(struct list_head *head_terms, int type_term)
+static char *get_config_str(struct list_head *head_terms, enum parse_events__term_type type_term)
 {
        struct parse_events_term *term;
 
@@ -195,38 +193,31 @@ static void fix_raw(struct list_head *config_terms, struct perf_pmu *pmu)
        struct parse_events_term *term;
 
        list_for_each_entry(term, config_terms, list) {
-               struct perf_pmu_alias *alias;
-               bool matched = false;
+               u64 num;
 
                if (term->type_term != PARSE_EVENTS__TERM_TYPE_RAW)
                        continue;
 
-               list_for_each_entry(alias, &pmu->aliases, list) {
-                       if (!strcmp(alias->name, term->val.str)) {
-                               free(term->config);
-                               term->config = term->val.str;
-                               term->type_val = PARSE_EVENTS__TERM_TYPE_NUM;
-                               term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
-                               term->val.num = 1;
-                               term->no_value = true;
-                               matched = true;
-                               break;
-                       }
-               }
-               if (!matched) {
-                       u64 num;
-
-                       free(term->config);
-                       term->config = strdup("config");
-                       errno = 0;
-                       num = strtoull(term->val.str + 1, NULL, 16);
-                       assert(errno == 0);
-                       free(term->val.str);
+               if (perf_pmu__have_event(pmu, term->val.str)) {
+                       zfree(&term->config);
+                       term->config = term->val.str;
                        term->type_val = PARSE_EVENTS__TERM_TYPE_NUM;
-                       term->type_term = PARSE_EVENTS__TERM_TYPE_CONFIG;
-                       term->val.num = num;
-                       term->no_value = false;
+                       term->type_term = PARSE_EVENTS__TERM_TYPE_USER;
+                       term->val.num = 1;
+                       term->no_value = true;
+                       continue;
                }
+
+               zfree(&term->config);
+               term->config = strdup("config");
+               errno = 0;
+               num = strtoull(term->val.str + 1, NULL, 16);
+               assert(errno == 0);
+               free(term->val.str);
+               term->type_val = PARSE_EVENTS__TERM_TYPE_NUM;
+               term->type_term = PARSE_EVENTS__TERM_TYPE_CONFIG;
+               term->val.num = num;
+               term->no_value = false;
        }
 }
 
@@ -271,7 +262,7 @@ __add_event(struct list_head *list, int *idx,
        evsel->core.is_pmu_core = pmu ? pmu->is_core : false;
        evsel->auto_merge_stats = auto_merge_stats;
        evsel->pmu = pmu;
-       evsel->pmu_name = pmu && pmu->name ? strdup(pmu->name) : NULL;
+       evsel->pmu_name = pmu ? strdup(pmu->name) : NULL;
 
        if (name)
                evsel->name = strdup(name);
@@ -446,9 +437,6 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
        if (parse_state->pmu_filter == NULL)
                return false;
 
-       if (pmu->name == NULL)
-               return true;
-
        return strcmp(parse_state->pmu_filter, pmu->name) != 0;
 }
 
@@ -499,7 +487,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
 
 #ifdef HAVE_LIBTRACEEVENT
 static void tracepoint_error(struct parse_events_error *e, int err,
-                            const char *sys, const char *name)
+                            const char *sys, const char *name, int column)
 {
        const char *str;
        char help[BUFSIZ];
@@ -526,18 +514,19 @@ static void tracepoint_error(struct parse_events_error *e, int err,
        }
 
        tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name);
-       parse_events_error__handle(e, 0, strdup(str), strdup(help));
+       parse_events_error__handle(e, column, strdup(str), strdup(help));
 }
 
 static int add_tracepoint(struct list_head *list, int *idx,
                          const char *sys_name, const char *evt_name,
                          struct parse_events_error *err,
-                         struct list_head *head_config)
+                         struct list_head *head_config, void *loc_)
 {
+       YYLTYPE *loc = loc_;
        struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++);
 
        if (IS_ERR(evsel)) {
-               tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name);
+               tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name, loc->first_column);
                return PTR_ERR(evsel);
        }
 
@@ -556,7 +545,7 @@ static int add_tracepoint(struct list_head *list, int *idx,
 static int add_tracepoint_multi_event(struct list_head *list, int *idx,
                                      const char *sys_name, const char *evt_name,
                                      struct parse_events_error *err,
-                                     struct list_head *head_config)
+                                     struct list_head *head_config, YYLTYPE *loc)
 {
        char *evt_path;
        struct dirent *evt_ent;
@@ -565,13 +554,13 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 
        evt_path = get_events_file(sys_name);
        if (!evt_path) {
-               tracepoint_error(err, errno, sys_name, evt_name);
+               tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
                return -1;
        }
        evt_dir = opendir(evt_path);
        if (!evt_dir) {
                put_events_file(evt_path);
-               tracepoint_error(err, errno, sys_name, evt_name);
+               tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
                return -1;
        }
 
@@ -588,11 +577,11 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
                found++;
 
                ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name,
-                                    err, head_config);
+                                    err, head_config, loc);
        }
 
        if (!found) {
-               tracepoint_error(err, ENOENT, sys_name, evt_name);
+               tracepoint_error(err, ENOENT, sys_name, evt_name, loc->first_column);
                ret = -1;
        }
 
@@ -604,19 +593,19 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 static int add_tracepoint_event(struct list_head *list, int *idx,
                                const char *sys_name, const char *evt_name,
                                struct parse_events_error *err,
-                               struct list_head *head_config)
+                               struct list_head *head_config, YYLTYPE *loc)
 {
        return strpbrk(evt_name, "*?") ?
-              add_tracepoint_multi_event(list, idx, sys_name, evt_name,
-                                         err, head_config) :
-              add_tracepoint(list, idx, sys_name, evt_name,
-                             err, head_config);
+               add_tracepoint_multi_event(list, idx, sys_name, evt_name,
+                                          err, head_config, loc) :
+               add_tracepoint(list, idx, sys_name, evt_name,
+                              err, head_config, loc);
 }
 
 static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
                                    const char *sys_name, const char *evt_name,
                                    struct parse_events_error *err,
-                                   struct list_head *head_config)
+                                   struct list_head *head_config, YYLTYPE *loc)
 {
        struct dirent *events_ent;
        DIR *events_dir;
@@ -624,7 +613,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 
        events_dir = tracing_events__opendir();
        if (!events_dir) {
-               tracepoint_error(err, errno, sys_name, evt_name);
+               tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
                return -1;
        }
 
@@ -640,7 +629,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
                        continue;
 
                ret = add_tracepoint_event(list, idx, events_ent->d_name,
-                                          evt_name, err, head_config);
+                                          evt_name, err, head_config, loc);
        }
 
        closedir(events_dir);
@@ -648,264 +637,6 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 }
 #endif /* HAVE_LIBTRACEEVENT */
 
-#ifdef HAVE_LIBBPF_SUPPORT
-struct __add_bpf_event_param {
-       struct parse_events_state *parse_state;
-       struct list_head *list;
-       struct list_head *head_config;
-};
-
-static int add_bpf_event(const char *group, const char *event, int fd, struct bpf_object *obj,
-                        void *_param)
-{
-       LIST_HEAD(new_evsels);
-       struct __add_bpf_event_param *param = _param;
-       struct parse_events_state *parse_state = param->parse_state;
-       struct list_head *list = param->list;
-       struct evsel *pos;
-       int err;
-       /*
-        * Check if we should add the event, i.e. if it is a TP but starts with a '!',
-        * then don't add the tracepoint, this will be used for something else, like
-        * adding to a BPF_MAP_TYPE_PROG_ARRAY.
-        *
-        * See tools/perf/examples/bpf/augmented_raw_syscalls.c
-        */
-       if (group[0] == '!')
-               return 0;
-
-       pr_debug("add bpf event %s:%s and attach bpf program %d\n",
-                group, event, fd);
-
-       err = parse_events_add_tracepoint(&new_evsels, &parse_state->idx, group,
-                                         event, parse_state->error,
-                                         param->head_config);
-       if (err) {
-               struct evsel *evsel, *tmp;
-
-               pr_debug("Failed to add BPF event %s:%s\n",
-                        group, event);
-               list_for_each_entry_safe(evsel, tmp, &new_evsels, core.node) {
-                       list_del_init(&evsel->core.node);
-                       evsel__delete(evsel);
-               }
-               return err;
-       }
-       pr_debug("adding %s:%s\n", group, event);
-
-       list_for_each_entry(pos, &new_evsels, core.node) {
-               pr_debug("adding %s:%s to %p\n",
-                        group, event, pos);
-               pos->bpf_fd = fd;
-               pos->bpf_obj = obj;
-       }
-       list_splice(&new_evsels, list);
-       return 0;
-}
-
-int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
-                             struct list_head *list,
-                             struct bpf_object *obj,
-                             struct list_head *head_config)
-{
-       int err;
-       char errbuf[BUFSIZ];
-       struct __add_bpf_event_param param = {parse_state, list, head_config};
-       static bool registered_unprobe_atexit = false;
-
-       if (IS_ERR(obj) || !obj) {
-               snprintf(errbuf, sizeof(errbuf),
-                        "Internal error: load bpf obj with NULL");
-               err = -EINVAL;
-               goto errout;
-       }
-
-       /*
-        * Register atexit handler before calling bpf__probe() so
-        * bpf__probe() don't need to unprobe probe points its already
-        * created when failure.
-        */
-       if (!registered_unprobe_atexit) {
-               atexit(bpf__clear);
-               registered_unprobe_atexit = true;
-       }
-
-       err = bpf__probe(obj);
-       if (err) {
-               bpf__strerror_probe(obj, err, errbuf, sizeof(errbuf));
-               goto errout;
-       }
-
-       err = bpf__load(obj);
-       if (err) {
-               bpf__strerror_load(obj, err, errbuf, sizeof(errbuf));
-               goto errout;
-       }
-
-       err = bpf__foreach_event(obj, add_bpf_event, &param);
-       if (err) {
-               snprintf(errbuf, sizeof(errbuf),
-                        "Attach events in BPF object failed");
-               goto errout;
-       }
-
-       return 0;
-errout:
-       parse_events_error__handle(parse_state->error, 0,
-                               strdup(errbuf), strdup("(add -v to see detail)"));
-       return err;
-}
-
-static int
-parse_events_config_bpf(struct parse_events_state *parse_state,
-                       struct bpf_object *obj,
-                       struct list_head *head_config)
-{
-       struct parse_events_term *term;
-       int error_pos;
-
-       if (!head_config || list_empty(head_config))
-               return 0;
-
-       list_for_each_entry(term, head_config, list) {
-               int err;
-
-               if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
-                       parse_events_error__handle(parse_state->error, term->err_term,
-                                               strdup("Invalid config term for BPF object"),
-                                               NULL);
-                       return -EINVAL;
-               }
-
-               err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos);
-               if (err) {
-                       char errbuf[BUFSIZ];
-                       int idx;
-
-                       bpf__strerror_config_obj(obj, term, parse_state->evlist,
-                                                &error_pos, err, errbuf,
-                                                sizeof(errbuf));
-
-                       if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
-                               idx = term->err_val;
-                       else
-                               idx = term->err_term + error_pos;
-
-                       parse_events_error__handle(parse_state->error, idx,
-                                               strdup(errbuf),
-                                               strdup(
-"Hint:\tValid config terms:\n"
-"     \tmap:[<arraymap>].value<indices>=[value]\n"
-"     \tmap:[<eventmap>].event<indices>=[event]\n"
-"\n"
-"     \twhere <indices> is something like [0,3...5] or [all]\n"
-"     \t(add -v to see detail)"));
-                       return err;
-               }
-       }
-       return 0;
-}
-
-/*
- * Split config terms:
- * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ...
- *  'call-graph=fp' is 'evt config', should be applied to each
- *  events in bpf.c.
- * 'map:array.value[0]=1' is 'obj config', should be processed
- * with parse_events_config_bpf.
- *
- * Move object config terms from the first list to obj_head_config.
- */
-static void
-split_bpf_config_terms(struct list_head *evt_head_config,
-                      struct list_head *obj_head_config)
-{
-       struct parse_events_term *term, *temp;
-
-       /*
-        * Currently, all possible user config term
-        * belong to bpf object. parse_events__is_hardcoded_term()
-        * happens to be a good flag.
-        *
-        * See parse_events_config_bpf() and
-        * config_term_tracepoint().
-        */
-       list_for_each_entry_safe(term, temp, evt_head_config, list)
-               if (!parse_events__is_hardcoded_term(term))
-                       list_move_tail(&term->list, obj_head_config);
-}
-
-int parse_events_load_bpf(struct parse_events_state *parse_state,
-                         struct list_head *list,
-                         char *bpf_file_name,
-                         bool source,
-                         struct list_head *head_config)
-{
-       int err;
-       struct bpf_object *obj;
-       LIST_HEAD(obj_head_config);
-
-       if (head_config)
-               split_bpf_config_terms(head_config, &obj_head_config);
-
-       obj = bpf__prepare_load(bpf_file_name, source);
-       if (IS_ERR(obj)) {
-               char errbuf[BUFSIZ];
-
-               err = PTR_ERR(obj);
-
-               if (err == -ENOTSUP)
-                       snprintf(errbuf, sizeof(errbuf),
-                                "BPF support is not compiled");
-               else
-                       bpf__strerror_prepare_load(bpf_file_name,
-                                                  source,
-                                                  -err, errbuf,
-                                                  sizeof(errbuf));
-
-               parse_events_error__handle(parse_state->error, 0,
-                                       strdup(errbuf), strdup("(add -v to see detail)"));
-               return err;
-       }
-
-       err = parse_events_load_bpf_obj(parse_state, list, obj, head_config);
-       if (err)
-               return err;
-       err = parse_events_config_bpf(parse_state, obj, &obj_head_config);
-
-       /*
-        * Caller doesn't know anything about obj_head_config,
-        * so combine them together again before returning.
-        */
-       if (head_config)
-               list_splice_tail(&obj_head_config, head_config);
-       return err;
-}
-#else // HAVE_LIBBPF_SUPPORT
-int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
-                             struct list_head *list __maybe_unused,
-                             struct bpf_object *obj __maybe_unused,
-                             struct list_head *head_config __maybe_unused)
-{
-       parse_events_error__handle(parse_state->error, 0,
-                                  strdup("BPF support is not compiled"),
-                                  strdup("Make sure libbpf-devel is available at build time."));
-       return -ENOTSUP;
-}
-
-int parse_events_load_bpf(struct parse_events_state *parse_state,
-                         struct list_head *list __maybe_unused,
-                         char *bpf_file_name __maybe_unused,
-                         bool source __maybe_unused,
-                         struct list_head *head_config __maybe_unused)
-{
-       parse_events_error__handle(parse_state->error, 0,
-                                  strdup("BPF support is not compiled"),
-                                  strdup("Make sure libbpf-devel is available at build time."));
-       return -ENOTSUP;
-}
-#endif // HAVE_LIBBPF_SUPPORT
-
 static int
 parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
 {
@@ -991,7 +722,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state,
 
 static int check_type_val(struct parse_events_term *term,
                          struct parse_events_error *err,
-                         int type)
+                         enum parse_events__term_val_type type)
 {
        if (type == term->type_val)
                return 0;
@@ -1006,42 +737,49 @@ static int check_type_val(struct parse_events_term *term,
        return -EINVAL;
 }
 
-/*
- * Update according to parse-events.l
- */
-static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
-       [PARSE_EVENTS__TERM_TYPE_USER]                  = "<sysfs term>",
-       [PARSE_EVENTS__TERM_TYPE_CONFIG]                = "config",
-       [PARSE_EVENTS__TERM_TYPE_CONFIG1]               = "config1",
-       [PARSE_EVENTS__TERM_TYPE_CONFIG2]               = "config2",
-       [PARSE_EVENTS__TERM_TYPE_CONFIG3]               = "config3",
-       [PARSE_EVENTS__TERM_TYPE_NAME]                  = "name",
-       [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD]         = "period",
-       [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ]           = "freq",
-       [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE]    = "branch_type",
-       [PARSE_EVENTS__TERM_TYPE_TIME]                  = "time",
-       [PARSE_EVENTS__TERM_TYPE_CALLGRAPH]             = "call-graph",
-       [PARSE_EVENTS__TERM_TYPE_STACKSIZE]             = "stack-size",
-       [PARSE_EVENTS__TERM_TYPE_NOINHERIT]             = "no-inherit",
-       [PARSE_EVENTS__TERM_TYPE_INHERIT]               = "inherit",
-       [PARSE_EVENTS__TERM_TYPE_MAX_STACK]             = "max-stack",
-       [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS]            = "nr",
-       [PARSE_EVENTS__TERM_TYPE_OVERWRITE]             = "overwrite",
-       [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]           = "no-overwrite",
-       [PARSE_EVENTS__TERM_TYPE_DRV_CFG]               = "driver-config",
-       [PARSE_EVENTS__TERM_TYPE_PERCORE]               = "percore",
-       [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT]            = "aux-output",
-       [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE]       = "aux-sample-size",
-       [PARSE_EVENTS__TERM_TYPE_METRIC_ID]             = "metric-id",
-       [PARSE_EVENTS__TERM_TYPE_RAW]                   = "raw",
-       [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE]          = "legacy-cache",
-       [PARSE_EVENTS__TERM_TYPE_HARDWARE]              = "hardware",
-};
-
 static bool config_term_shrinked;
 
+static const char *config_term_name(enum parse_events__term_type term_type)
+{
+       /*
+        * Update according to parse-events.l
+        */
+       static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
+               [PARSE_EVENTS__TERM_TYPE_USER]                  = "<sysfs term>",
+               [PARSE_EVENTS__TERM_TYPE_CONFIG]                = "config",
+               [PARSE_EVENTS__TERM_TYPE_CONFIG1]               = "config1",
+               [PARSE_EVENTS__TERM_TYPE_CONFIG2]               = "config2",
+               [PARSE_EVENTS__TERM_TYPE_CONFIG3]               = "config3",
+               [PARSE_EVENTS__TERM_TYPE_NAME]                  = "name",
+               [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD]         = "period",
+               [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ]           = "freq",
+               [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE]    = "branch_type",
+               [PARSE_EVENTS__TERM_TYPE_TIME]                  = "time",
+               [PARSE_EVENTS__TERM_TYPE_CALLGRAPH]             = "call-graph",
+               [PARSE_EVENTS__TERM_TYPE_STACKSIZE]             = "stack-size",
+               [PARSE_EVENTS__TERM_TYPE_NOINHERIT]             = "no-inherit",
+               [PARSE_EVENTS__TERM_TYPE_INHERIT]               = "inherit",
+               [PARSE_EVENTS__TERM_TYPE_MAX_STACK]             = "max-stack",
+               [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS]            = "nr",
+               [PARSE_EVENTS__TERM_TYPE_OVERWRITE]             = "overwrite",
+               [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]           = "no-overwrite",
+               [PARSE_EVENTS__TERM_TYPE_DRV_CFG]               = "driver-config",
+               [PARSE_EVENTS__TERM_TYPE_PERCORE]               = "percore",
+               [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT]            = "aux-output",
+               [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE]       = "aux-sample-size",
+               [PARSE_EVENTS__TERM_TYPE_METRIC_ID]             = "metric-id",
+               [PARSE_EVENTS__TERM_TYPE_RAW]                   = "raw",
+               [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE]          = "legacy-cache",
+               [PARSE_EVENTS__TERM_TYPE_HARDWARE]              = "hardware",
+       };
+       if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR)
+               return "unknown term";
+
+       return config_term_names[term_type];
+}
+
 static bool
-config_term_avail(int term_type, struct parse_events_error *err)
+config_term_avail(enum parse_events__term_type term_type, struct parse_events_error *err)
 {
        char *err_str;
 
@@ -1063,13 +801,31 @@ config_term_avail(int term_type, struct parse_events_error *err)
        case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
        case PARSE_EVENTS__TERM_TYPE_PERCORE:
                return true;
+       case PARSE_EVENTS__TERM_TYPE_USER:
+       case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+       case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+       case PARSE_EVENTS__TERM_TYPE_TIME:
+       case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+       case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+       case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+       case PARSE_EVENTS__TERM_TYPE_INHERIT:
+       case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+       case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+       case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+       case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+       case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+       case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+       case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+       case PARSE_EVENTS__TERM_TYPE_RAW:
+       case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+       case PARSE_EVENTS__TERM_TYPE_HARDWARE:
        default:
                if (!err)
                        return false;
 
                /* term_type is validated so indexing is safe */
                if (asprintf(&err_str, "'%s' is not usable in 'perf stat'",
-                               config_term_names[term_type]) >= 0)
+                            config_term_name(term_type)) >= 0)
                        parse_events_error__handle(err, -1, err_str, NULL);
                return false;
        }
@@ -1187,10 +943,14 @@ do {                                                                        \
                        return -EINVAL;
                }
                break;
+       case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+       case PARSE_EVENTS__TERM_TYPE_USER:
+       case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+       case PARSE_EVENTS__TERM_TYPE_HARDWARE:
        default:
                parse_events_error__handle(err, term->err_term,
-                               strdup("unknown term"),
-                               parse_events_formats_error_string(NULL));
+                                       strdup(config_term_name(term->type_term)),
+                                       parse_events_formats_error_string(NULL));
                return -EINVAL;
        }
 
@@ -1276,10 +1036,26 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
        case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
        case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
                return config_term_common(attr, term, err);
+       case PARSE_EVENTS__TERM_TYPE_USER:
+       case PARSE_EVENTS__TERM_TYPE_CONFIG:
+       case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+       case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+       case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+       case PARSE_EVENTS__TERM_TYPE_NAME:
+       case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+       case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+       case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+       case PARSE_EVENTS__TERM_TYPE_TIME:
+       case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+       case PARSE_EVENTS__TERM_TYPE_PERCORE:
+       case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+       case PARSE_EVENTS__TERM_TYPE_RAW:
+       case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+       case PARSE_EVENTS__TERM_TYPE_HARDWARE:
        default:
                if (err) {
                        parse_events_error__handle(err, term->err_term,
-                               strdup("unknown term"),
+                                                  strdup(config_term_name(term->type_term)),
                                strdup("valid terms: call-graph,stack-size\n"));
                }
                return -EINVAL;
@@ -1397,6 +1173,16 @@ do {                                                             \
                        ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
                                            term->val.num, term->weak);
                        break;
+               case PARSE_EVENTS__TERM_TYPE_USER:
+               case PARSE_EVENTS__TERM_TYPE_CONFIG:
+               case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+               case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+               case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+               case PARSE_EVENTS__TERM_TYPE_NAME:
+               case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+               case PARSE_EVENTS__TERM_TYPE_RAW:
+               case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+               case PARSE_EVENTS__TERM_TYPE_HARDWARE:
                default:
                        break;
                }
@@ -1418,14 +1204,38 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
        list_for_each_entry(term, head_config, list) {
                switch (term->type_term) {
                case PARSE_EVENTS__TERM_TYPE_USER:
-                       type = perf_pmu__format_type(&pmu->format, term->config);
+                       type = perf_pmu__format_type(pmu, term->config);
                        if (type != PERF_PMU_FORMAT_VALUE_CONFIG)
                                continue;
-                       bits |= perf_pmu__format_bits(&pmu->format, term->config);
+                       bits |= perf_pmu__format_bits(pmu, term->config);
                        break;
                case PARSE_EVENTS__TERM_TYPE_CONFIG:
                        bits = ~(u64)0;
                        break;
+               case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+               case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+               case PARSE_EVENTS__TERM_TYPE_CONFIG3:
+               case PARSE_EVENTS__TERM_TYPE_NAME:
+               case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+               case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+               case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+               case PARSE_EVENTS__TERM_TYPE_TIME:
+               case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+               case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+               case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+               case PARSE_EVENTS__TERM_TYPE_INHERIT:
+               case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+               case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+               case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+               case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+               case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+               case PARSE_EVENTS__TERM_TYPE_PERCORE:
+               case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+               case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
+               case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
+               case PARSE_EVENTS__TERM_TYPE_RAW:
+               case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
+               case PARSE_EVENTS__TERM_TYPE_HARDWARE:
                default:
                        break;
                }
@@ -1441,8 +1251,9 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
                                const char *sys, const char *event,
                                struct parse_events_error *err,
-                               struct list_head *head_config)
+                               struct list_head *head_config, void *loc_)
 {
+       YYLTYPE *loc = loc_;
 #ifdef HAVE_LIBTRACEEVENT
        if (head_config) {
                struct perf_event_attr attr;
@@ -1454,17 +1265,17 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
 
        if (strpbrk(sys, "*?"))
                return add_tracepoint_multi_sys(list, idx, sys, event,
-                                               err, head_config);
+                                               err, head_config, loc);
        else
                return add_tracepoint_event(list, idx, sys, event,
-                                           err, head_config);
+                                           err, head_config, loc);
 #else
        (void)list;
        (void)idx;
        (void)sys;
        (void)event;
        (void)head_config;
-       parse_events_error__handle(err, 0, strdup("unsupported tracepoint"),
+       parse_events_error__handle(err, loc->first_column, strdup("unsupported tracepoint"),
                                strdup("libtraceevent is necessary for tracepoint support"));
        return -1;
 #endif
@@ -1557,41 +1368,44 @@ static bool config_term_percore(struct list_head *config_terms)
 }
 
 int parse_events_add_pmu(struct parse_events_state *parse_state,
-                        struct list_head *list, char *name,
+                        struct list_head *list, const char *name,
                         struct list_head *head_config,
-                        bool auto_merge_stats)
+                        bool auto_merge_stats, void *loc_)
 {
        struct perf_event_attr attr;
        struct perf_pmu_info info;
        struct perf_pmu *pmu;
        struct evsel *evsel;
        struct parse_events_error *err = parse_state->error;
+       YYLTYPE *loc = loc_;
        LIST_HEAD(config_terms);
 
        pmu = parse_state->fake_pmu ?: perf_pmus__find(name);
 
-       if (verbose > 1 && !(pmu && pmu->selectable)) {
-               fprintf(stderr, "Attempting to add event pmu '%s' with '",
-                       name);
-               if (head_config) {
-                       struct parse_events_term *term;
-
-                       list_for_each_entry(term, head_config, list) {
-                               fprintf(stderr, "%s,", term->config);
-                       }
-               }
-               fprintf(stderr, "' that may result in non-fatal errors\n");
-       }
-
        if (!pmu) {
                char *err_str;
 
                if (asprintf(&err_str,
                                "Cannot find PMU `%s'. Missing kernel support?",
                                name) >= 0)
-                       parse_events_error__handle(err, 0, err_str, NULL);
+                       parse_events_error__handle(err, loc->first_column, err_str, NULL);
                return -EINVAL;
        }
+
+       if (verbose > 1) {
+               struct strbuf sb;
+
+               strbuf_init(&sb, /*hint=*/ 0);
+               if (pmu->selectable && !head_config) {
+                       strbuf_addf(&sb, "%s//", name);
+               } else {
+                       strbuf_addf(&sb, "%s/", name);
+                       parse_events_term__to_strbuf(head_config, &sb);
+                       strbuf_addch(&sb, '/');
+               }
+               fprintf(stderr, "Attempt to add: %s\n", sb.buf);
+               strbuf_release(&sb);
+       }
        if (head_config)
                fix_raw(head_config, pmu);
 
@@ -1612,20 +1426,16 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
                return evsel ? 0 : -ENOMEM;
        }
 
-       if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info))
+       if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info, err))
                return -EINVAL;
 
        if (verbose > 1) {
-               fprintf(stderr, "After aliases, add event pmu '%s' with '",
-                       name);
-               if (head_config) {
-                       struct parse_events_term *term;
+               struct strbuf sb;
 
-                       list_for_each_entry(term, head_config, list) {
-                               fprintf(stderr, "%s,", term->config);
-                       }
-               }
-               fprintf(stderr, "' that may result in non-fatal errors\n");
+               strbuf_init(&sb, /*hint=*/ 0);
+               parse_events_term__to_strbuf(head_config, &sb);
+               fprintf(stderr, "..after resolving event: %s/%s/\n", name, sb.buf);
+               strbuf_release(&sb);
        }
 
        /*
@@ -1675,14 +1485,15 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
                               char *str, struct list_head *head,
-                              struct list_head **listp)
+                              struct list_head **listp, void *loc_)
 {
        struct parse_events_term *term;
        struct list_head *list = NULL;
        struct list_head *orig_head = NULL;
        struct perf_pmu *pmu = NULL;
+       YYLTYPE *loc = loc_;
        int ok = 0;
-       char *config;
+       const char *config;
 
        *listp = NULL;
 
@@ -1699,9 +1510,9 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 
        if (parse_events_term__num(&term,
                                   PARSE_EVENTS__TERM_TYPE_USER,
-                                  config, 1, false, NULL,
-                                       NULL) < 0) {
-               free(config);
+                                  config, /*num=*/1, /*novalue=*/true,
+                                  loc, /*loc_val=*/NULL) < 0) {
+               zfree(&config);
                goto out_err;
        }
        list_add_tail(&term->list, head);
@@ -1714,33 +1525,38 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
        INIT_LIST_HEAD(list);
 
        while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-               struct perf_pmu_alias *alias;
                bool auto_merge_stats;
 
                if (parse_events__filter_pmu(parse_state, pmu))
                        continue;
 
-               auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
+               if (!perf_pmu__have_event(pmu, str))
+                       continue;
 
-               list_for_each_entry(alias, &pmu->aliases, list) {
-                       if (!strcasecmp(alias->name, str)) {
-                               parse_events_copy_term_list(head, &orig_head);
-                               if (!parse_events_add_pmu(parse_state, list,
-                                                         pmu->name, orig_head,
-                                                         auto_merge_stats)) {
-                                       pr_debug("%s -> %s/%s/\n", str,
-                                                pmu->name, alias->str);
-                                       ok++;
-                               }
-                               parse_events_terms__delete(orig_head);
-                       }
+               auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
+               parse_events_copy_term_list(head, &orig_head);
+               if (!parse_events_add_pmu(parse_state, list, pmu->name,
+                                         orig_head, auto_merge_stats, loc)) {
+                       struct strbuf sb;
+
+                       strbuf_init(&sb, /*hint=*/ 0);
+                       parse_events_term__to_strbuf(orig_head, &sb);
+                       pr_debug("%s -> %s/%s/\n", str, pmu->name, sb.buf);
+                       strbuf_release(&sb);
+                       ok++;
                }
+               parse_events_terms__delete(orig_head);
        }
 
        if (parse_state->fake_pmu) {
                if (!parse_events_add_pmu(parse_state, list, str, head,
-                                         /*auto_merge_stats=*/true)) {
-                       pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str);
+                                         /*auto_merge_stats=*/true, loc)) {
+                       struct strbuf sb;
+
+                       strbuf_init(&sb, /*hint=*/ 0);
+                       parse_events_term__to_strbuf(head, &sb);
+                       pr_debug("%s -> %s/%s/\n", str, "fake_pmu", sb.buf);
+                       strbuf_release(&sb);
                        ok++;
                }
        }
@@ -1972,14 +1788,18 @@ int parse_events_name(struct list_head *list, const char *name)
        struct evsel *evsel;
 
        __evlist__for_each_entry(list, evsel) {
-               if (!evsel->name)
+               if (!evsel->name) {
                        evsel->name = strdup(name);
+                       if (!evsel->name)
+                               return -ENOMEM;
+               }
        }
 
        return 0;
 }
 
 static int parse_events__scanner(const char *str,
+                                FILE *input,
                                 struct parse_events_state *parse_state)
 {
        YY_BUFFER_STATE buffer;
@@ -1990,7 +1810,10 @@ static int parse_events__scanner(const char *str,
        if (ret)
                return ret;
 
-       buffer = parse_events__scan_string(str, scanner);
+       if (str)
+               buffer = parse_events__scan_string(str, scanner);
+       else
+               parse_events_set_in(input, scanner);
 
 #ifdef PARSER_DEBUG
        parse_events_debug = 1;
@@ -1998,8 +1821,10 @@ static int parse_events__scanner(const char *str,
 #endif
        ret = parse_events_parse(parse_state, scanner);
 
-       parse_events__flush_buffer(buffer, scanner);
-       parse_events__delete_buffer(buffer, scanner);
+       if (str) {
+               parse_events__flush_buffer(buffer, scanner);
+               parse_events__delete_buffer(buffer, scanner);
+       }
        parse_events_lex_destroy(scanner);
        return ret;
 }
@@ -2007,7 +1832,7 @@ static int parse_events__scanner(const char *str,
 /*
  * parse event config string, return a list of event terms.
  */
-int parse_events_terms(struct list_head *terms, const char *str)
+int parse_events_terms(struct list_head *terms, const char *str, FILE *input)
 {
        struct parse_events_state parse_state = {
                .terms  = NULL,
@@ -2015,7 +1840,7 @@ int parse_events_terms(struct list_head *terms, const char *str)
        };
        int ret;
 
-       ret = parse_events__scanner(str, &parse_state);
+       ret = parse_events__scanner(str, input, &parse_state);
 
        if (!ret) {
                list_splice(parse_state.terms, terms);
@@ -2259,7 +2084,6 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte
                .list     = LIST_HEAD_INIT(parse_state.list),
                .idx      = evlist->core.nr_entries,
                .error    = err,
-               .evlist   = evlist,
                .stoken   = PE_START_EVENTS,
                .fake_pmu = fake_pmu,
                .pmu_filter = pmu_filter,
@@ -2267,7 +2091,7 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte
        };
        int ret, ret2;
 
-       ret = parse_events__scanner(str, &parse_state);
+       ret = parse_events__scanner(str, /*input=*/ NULL, &parse_state);
 
        if (!ret && list_empty(&parse_state.list)) {
                WARN_ONCE(true, "WARNING: event parser found nothing\n");
@@ -2348,7 +2172,7 @@ void parse_events_error__handle(struct parse_events_error *err, int idx,
                break;
        default:
                pr_debug("Multiple errors dropping message: %s (%s)\n",
-                       err->str, err->help);
+                       err->str, err->help ?: "<no help>");
                free(err->str);
                err->str = str;
                free(err->help);
@@ -2641,7 +2465,8 @@ static int new_term(struct parse_events_term **_term,
 }
 
 int parse_events_term__num(struct parse_events_term **term,
-                          int type_term, char *config, u64 num,
+                          enum parse_events__term_type type_term,
+                          const char *config, u64 num,
                           bool no_value,
                           void *loc_term_, void *loc_val_)
 {
@@ -2651,17 +2476,18 @@ int parse_events_term__num(struct parse_events_term **term,
        struct parse_events_term temp = {
                .type_val  = PARSE_EVENTS__TERM_TYPE_NUM,
                .type_term = type_term,
-               .config    = config ? : strdup(config_term_names[type_term]),
+               .config    = config ? : strdup(config_term_name(type_term)),
                .no_value  = no_value,
                .err_term  = loc_term ? loc_term->first_column : 0,
                .err_val   = loc_val  ? loc_val->first_column  : 0,
        };
 
-       return new_term(term, &temp, NULL, num);
+       return new_term(term, &temp, /*str=*/NULL, num);
 }
 
 int parse_events_term__str(struct parse_events_term **term,
-                          int type_term, char *config, char *str,
+                          enum parse_events__term_type type_term,
+                          char *config, char *str,
                           void *loc_term_, void *loc_val_)
 {
        YYLTYPE *loc_term = loc_term_;
@@ -2675,15 +2501,16 @@ int parse_events_term__str(struct parse_events_term **term,
                .err_val   = loc_val  ? loc_val->first_column  : 0,
        };
 
-       return new_term(term, &temp, str, 0);
+       return new_term(term, &temp, str, /*num=*/0);
 }
 
 int parse_events_term__term(struct parse_events_term **term,
-                           int term_lhs, int term_rhs,
+                           enum parse_events__term_type term_lhs,
+                           enum parse_events__term_type term_rhs,
                            void *loc_term, void *loc_val)
 {
        return parse_events_term__str(term, term_lhs, NULL,
-                                     strdup(config_term_names[term_rhs]),
+                                     strdup(config_term_name(term_rhs)),
                                      loc_term, loc_val);
 }
 
@@ -2691,33 +2518,25 @@ int parse_events_term__clone(struct parse_events_term **new,
                             struct parse_events_term *term)
 {
        char *str;
-       struct parse_events_term temp = {
-               .type_val  = term->type_val,
-               .type_term = term->type_term,
-               .config    = NULL,
-               .err_term  = term->err_term,
-               .err_val   = term->err_val,
-       };
+       struct parse_events_term temp = *term;
 
+       temp.used = false;
        if (term->config) {
                temp.config = strdup(term->config);
                if (!temp.config)
                        return -ENOMEM;
        }
        if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
-               return new_term(new, &temp, NULL, term->val.num);
+               return new_term(new, &temp, /*str=*/NULL, term->val.num);
 
        str = strdup(term->val.str);
        if (!str)
                return -ENOMEM;
-       return new_term(new, &temp, str, 0);
+       return new_term(new, &temp, str, /*num=*/0);
 }
 
 void parse_events_term__delete(struct parse_events_term *term)
 {
-       if (term->array.nr_ranges)
-               zfree(&term->array.ranges);
-
        if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM)
                zfree(&term->val.str);
 
@@ -2768,9 +2587,47 @@ void parse_events_terms__delete(struct list_head *terms)
        free(terms);
 }
 
-void parse_events__clear_array(struct parse_events_array *a)
+int parse_events_term__to_strbuf(struct list_head *term_list, struct strbuf *sb)
 {
-       zfree(&a->ranges);
+       struct parse_events_term *term;
+       bool first = true;
+
+       if (!term_list)
+               return 0;
+
+       list_for_each_entry(term, term_list, list) {
+               int ret;
+
+               if (!first) {
+                       ret = strbuf_addch(sb, ',');
+                       if (ret < 0)
+                               return ret;
+               }
+               first = false;
+
+               if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+                       if (term->no_value) {
+                               assert(term->val.num == 1);
+                               ret = strbuf_addf(sb, "%s", term->config);
+                       } else
+                               ret = strbuf_addf(sb, "%s=%#"PRIx64, term->config, term->val.num);
+               else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+                       if (term->config) {
+                               ret = strbuf_addf(sb, "%s=", term->config);
+                               if (ret < 0)
+                                       return ret;
+                       } else if ((unsigned int)term->type_term < __PARSE_EVENTS__TERM_TYPE_NR) {
+                               ret = strbuf_addf(sb, "%s=", config_term_name(term->type_term));
+                               if (ret < 0)
+                                       return ret;
+                       }
+                       assert(!term->no_value);
+                       ret = strbuf_addf(sb, "%s", term->val.str);
+               }
+               if (ret < 0)
+                       return ret;
+       }
+       return 0;
 }
 
 void parse_events_evlist_error(struct parse_events_state *parse_state,
@@ -2789,7 +2646,7 @@ static void config_terms_list(char *buf, size_t buf_sz)
 
        buf[0] = '\0';
        for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
-               const char *name = config_term_names[i];
+               const char *name = config_term_name(i);
 
                if (!config_term_avail(i, NULL))
                        continue;
index b0eb95f..594e5d2 100644 (file)
@@ -9,6 +9,7 @@
 #include <stdbool.h>
 #include <linux/types.h>
 #include <linux/perf_event.h>
+#include <stdio.h>
 #include <string.h>
 
 struct evsel;
@@ -17,6 +18,7 @@ struct parse_events_error;
 
 struct option;
 struct perf_pmu;
+struct strbuf;
 
 const char *event_type(int type);
 
@@ -42,16 +44,16 @@ static inline int parse_events(struct evlist *evlist, const char *str,
 
 int parse_event(struct evlist *evlist, const char *str);
 
-int parse_events_terms(struct list_head *terms, const char *str);
+int parse_events_terms(struct list_head *terms, const char *str, FILE *input);
 int parse_filter(const struct option *opt, const char *str, int unset);
 int exclude_perf(const struct option *opt, const char *arg, int unset);
 
-enum {
+enum parse_events__term_val_type {
        PARSE_EVENTS__TERM_TYPE_NUM,
        PARSE_EVENTS__TERM_TYPE_STR,
 };
 
-enum {
+enum parse_events__term_type {
        PARSE_EVENTS__TERM_TYPE_USER,
        PARSE_EVENTS__TERM_TYPE_CONFIG,
        PARSE_EVENTS__TERM_TYPE_CONFIG1,
@@ -78,36 +80,54 @@ enum {
        PARSE_EVENTS__TERM_TYPE_RAW,
        PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
        PARSE_EVENTS__TERM_TYPE_HARDWARE,
-       __PARSE_EVENTS__TERM_TYPE_NR,
-};
-
-struct parse_events_array {
-       size_t nr_ranges;
-       struct {
-               unsigned int start;
-               size_t length;
-       } *ranges;
+#define        __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1)
 };
 
 struct parse_events_term {
-       char *config;
-       struct parse_events_array array;
+       /** @list: The term list the term is a part of. */
+       struct list_head list;
+       /**
+        * @config: The left-hand side of a term assignment, so the term
+        * "event=8" would have the config be "event"
+        */
+       const char *config;
+       /**
+        * @val: The right-hand side of a term assignment that can either be a
+        * string or a number depending on type_val.
+        */
        union {
                char *str;
                u64  num;
        } val;
-       int type_val;
-       int type_term;
-       struct list_head list;
-       bool used;
-       bool no_value;
-
-       /* error string indexes for within parsed string */
+       /** @type_val: The union variable in val to be used for the term. */
+       enum parse_events__term_val_type type_val;
+       /**
+        * @type_term: A predefined term type or PARSE_EVENTS__TERM_TYPE_USER
+        * when not inbuilt.
+        */
+       enum parse_events__term_type type_term;
+       /**
+        * @err_term: The column index of the term from parsing, used during
+        * error output.
+        */
        int err_term;
+       /**
+        * @err_val: The column index of the val from parsing, used during error
+        * output.
+        */
        int err_val;
-
-       /* Coming from implicit alias */
+       /** @used: Was the term used during parameterized-eval. */
+       bool used;
+       /**
+        * @weak: A term from the sysfs or json encoding of an event that
+        * shouldn't override terms coming from the command line.
+        */
        bool weak;
+       /**
+        * @no_value: Is there no value. If a numeric term has no value then the
+        * value is assumed to be 1. An event name also has no value.
+        */
+       bool no_value;
 };
 
 struct parse_events_error {
@@ -121,17 +141,23 @@ struct parse_events_error {
 };
 
 struct parse_events_state {
+       /* The list parsed events are placed on. */
        struct list_head           list;
+       /* The updated index used by entries as they are added. */
        int                        idx;
+       /* Error information. */
        struct parse_events_error *error;
-       struct evlist             *evlist;
+       /* Holds returned terms for term parsing. */
        struct list_head          *terms;
+       /* Start token. */
        int                        stoken;
+       /* Special fake PMU marker for testing. */
        struct perf_pmu           *fake_pmu;
        /* If non-null, when wildcard matching only match the given PMU. */
        const char                *pmu_filter;
        /* Should PE_LEGACY_NAME tokens be generated for config terms? */
        bool                       match_legacy_cache_terms;
+       /* Were multiple PMUs scanned to find events? */
        bool                       wild_card_pmus;
 };
 
@@ -140,39 +166,31 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
 void parse_events__shrink_config_terms(void);
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
 int parse_events_term__num(struct parse_events_term **term,
-                          int type_term, char *config, u64 num,
+                          enum parse_events__term_type type_term,
+                          const char *config, u64 num,
                           bool novalue,
                           void *loc_term, void *loc_val);
 int parse_events_term__str(struct parse_events_term **term,
-                          int type_term, char *config, char *str,
+                          enum parse_events__term_type type_term,
+                          char *config, char *str,
                           void *loc_term, void *loc_val);
 int parse_events_term__term(struct parse_events_term **term,
-                           int term_lhs, int term_rhs,
+                           enum parse_events__term_type term_lhs,
+                           enum parse_events__term_type term_rhs,
                            void *loc_term, void *loc_val);
 int parse_events_term__clone(struct parse_events_term **new,
                             struct parse_events_term *term);
 void parse_events_term__delete(struct parse_events_term *term);
 void parse_events_terms__delete(struct list_head *terms);
 void parse_events_terms__purge(struct list_head *terms);
-void parse_events__clear_array(struct parse_events_array *a);
+int parse_events_term__to_strbuf(struct list_head *term_list, struct strbuf *sb);
 int parse_events__modifier_event(struct list_head *list, char *str, bool add);
 int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, const char *name);
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
                                const char *sys, const char *event,
                                struct parse_events_error *error,
-                               struct list_head *head_config);
-int parse_events_load_bpf(struct parse_events_state *parse_state,
-                         struct list_head *list,
-                         char *bpf_file_name,
-                         bool source,
-                         struct list_head *head_config);
-/* Provide this function for perf test */
-struct bpf_object;
-int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
-                             struct list_head *list,
-                             struct bpf_object *obj,
-                             struct list_head *head_config);
+                               struct list_head *head_config, void *loc);
 int parse_events_add_numeric(struct parse_events_state *parse_state,
                             struct list_head *list,
                             u32 type, u64 config,
@@ -190,9 +208,9 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state,
                                u64 addr, char *type, u64 len,
                                struct list_head *head_config);
 int parse_events_add_pmu(struct parse_events_state *parse_state,
-                        struct list_head *list, char *name,
+                        struct list_head *list, const char *name,
                         struct list_head *head_config,
-                        bool auto_merge_stats);
+                       bool auto_merge_stats, void *loc);
 
 struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
                                      const char *name, const char *metric_id,
@@ -201,7 +219,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
                               char *str,
                               struct list_head *head_config,
-                              struct list_head **listp);
+                              struct list_head **listp, void *loc);
 
 int parse_events_copy_term_list(struct list_head *old,
                                 struct list_head **new);
index 99335ec..4ef4b6f 100644 (file)
@@ -68,31 +68,6 @@ static int lc_str(yyscan_t scanner, const struct parse_events_state *state)
        return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME);
 }
 
-static bool isbpf_suffix(char *text)
-{
-       int len = strlen(text);
-
-       if (len < 2)
-               return false;
-       if ((text[len - 1] == 'c' || text[len - 1] == 'o') &&
-           text[len - 2] == '.')
-               return true;
-       if (len > 4 && !strcmp(text + len - 4, ".obj"))
-               return true;
-       return false;
-}
-
-static bool isbpf(yyscan_t scanner)
-{
-       char *text = parse_events_get_text(scanner);
-       struct stat st;
-
-       if (!isbpf_suffix(text))
-               return false;
-
-       return stat(text, &st) == 0;
-}
-
 /*
  * This function is called when the parser gets two kind of input:
  *
@@ -141,7 +116,7 @@ static int tool(yyscan_t scanner, enum perf_tool_event event)
        return PE_VALUE_SYM_TOOL;
 }
 
-static int term(yyscan_t scanner, int type)
+static int term(yyscan_t scanner, enum parse_events__term_type type)
 {
        YYSTYPE *yylval = parse_events_get_lval(scanner);
 
@@ -175,13 +150,10 @@ do {                                                      \
 %x mem
 %s config
 %x event
-%x array
 
 group          [^,{}/]*[{][^}]*[}][^,{}/]*
 event_pmu      [^,{}/]+[/][^/]*[/][^,{}/]*
 event          [^,{}/]+
-bpf_object     [^,{}]+\.(o|bpf)[a-zA-Z0-9._]*
-bpf_source     [^,{}]+\.c[a-zA-Z0-9._]*
 
 num_dec                [0-9]+
 num_hex                0x[a-fA-F0-9]+
@@ -234,8 +206,6 @@ non_digit   [^0-9]
                }
 
 {event_pmu}    |
-{bpf_object}   |
-{bpf_source}   |
 {event}                {
                        BEGIN(INITIAL);
                        REWIND(1);
@@ -251,14 +221,6 @@ non_digit  [^0-9]
                }
 }
 
-<array>{
-"]"                    { BEGIN(config); return ']'; }
-{num_dec}              { return value(yyscanner, 10); }
-{num_hex}              { return value(yyscanner, 16); }
-,                      { return ','; }
-"\.\.\."               { return PE_ARRAY_RANGE; }
-}
-
 <config>{
        /*
         * Please update config_term_names when new static term is added.
@@ -302,8 +264,6 @@ r0x{num_raw_hex}    { return str(yyscanner, PE_RAW); }
 {lc_type}-{lc_op_result}       { return lc_str(yyscanner, _parse_state); }
 {lc_type}-{lc_op_result}-{lc_op_result}        { return lc_str(yyscanner, _parse_state); }
 {name_minus}           { return str(yyscanner, PE_NAME); }
-\[all\]                        { return PE_ARRAY_ALL; }
-"["                    { BEGIN(array); return '['; }
 @{drv_cfg_term}                { return drv_str(yyscanner, PE_DRV_CFG_TERM); }
 }
 
@@ -374,8 +334,6 @@ r{num_raw_hex}              { return str(yyscanner, PE_RAW); }
 {num_hex}              { return value(yyscanner, 16); }
 
 {modifier_event}       { return str(yyscanner, PE_MODIFIER_EVENT); }
-{bpf_object}           { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
-{bpf_source}           { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
 {name}                 { return str(yyscanner, PE_NAME); }
 {name_tag}             { return str(yyscanner, PE_NAME); }
 "/"                    { BEGIN(config); return '/'; }
index 9f28d4b..21bfe7e 100644 (file)
 #include "parse-events.h"
 #include "parse-events-bison.h"
 
+int parse_events_lex(YYSTYPE * yylval_param, YYLTYPE * yylloc_param , void *yyscanner);
 void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg);
 
-#define ABORT_ON(val) \
+#define PE_ABORT(val) \
 do { \
-       if (val) \
-               YYABORT; \
+       if (val == -ENOMEM) \
+               YYNOMEM; \
+       YYABORT; \
 } while (0)
 
 static struct list_head* alloc_list(void)
@@ -58,13 +60,10 @@ static void free_list_evsel(struct list_head* list_evsel)
 %token PE_VALUE_SYM_TOOL
 %token PE_EVENT_NAME
 %token PE_RAW PE_NAME
-%token PE_BPF_OBJECT PE_BPF_SOURCE
 %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH
 %token PE_LEGACY_CACHE
-%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
+%token PE_PREFIX_MEM
 %token PE_ERROR
-%token PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
-%token PE_ARRAY_ALL PE_ARRAY_RANGE
 %token PE_DRV_CFG_TERM
 %token PE_TERM_HW
 %type <num> PE_VALUE
@@ -75,13 +74,10 @@ static void free_list_evsel(struct list_head* list_evsel)
 %type <num> value_sym
 %type <str> PE_RAW
 %type <str> PE_NAME
-%type <str> PE_BPF_OBJECT
-%type <str> PE_BPF_SOURCE
 %type <str> PE_LEGACY_CACHE
 %type <str> PE_MODIFIER_EVENT
 %type <str> PE_MODIFIER_BP
 %type <str> PE_EVENT_NAME
-%type <str> PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
 %type <str> PE_DRV_CFG_TERM
 %type <str> name_or_raw name_or_legacy
 %destructor { free ($$); } <str>
@@ -98,7 +94,6 @@ static void free_list_evsel(struct list_head* list_evsel)
 %type <list_evsel> event_legacy_tracepoint
 %type <list_evsel> event_legacy_numeric
 %type <list_evsel> event_legacy_raw
-%type <list_evsel> event_bpf_file
 %type <list_evsel> event_def
 %type <list_evsel> event_mod
 %type <list_evsel> event_name
@@ -109,11 +104,6 @@ static void free_list_evsel(struct list_head* list_evsel)
 %type <list_evsel> groups
 %destructor { free_list_evsel ($$); } <list_evsel>
 %type <tracepoint_name> tracepoint_name
-%destructor { free ($$.sys); free ($$.event); } <tracepoint_name>
-%type <array> array
-%type <array> array_term
-%type <array> array_terms
-%destructor { free ($$.ranges); } <array>
 %type <hardware_term> PE_TERM_HW
 %destructor { free ($$.str); } <hardware_term>
 
@@ -128,7 +118,6 @@ static void free_list_evsel(struct list_head* list_evsel)
                char *sys;
                char *event;
        } tracepoint_name;
-       struct parse_events_array array;
        struct hardware_term {
                char *str;
                u64 num;
@@ -265,7 +254,7 @@ PE_EVENT_NAME event_def
        free($1);
        if (err) {
                free_list_evsel($2);
-               YYABORT;
+               YYNOMEM;
        }
        $$ = $2;
 }
@@ -278,47 +267,47 @@ event_def: event_pmu |
           event_legacy_mem sep_dc |
           event_legacy_tracepoint sep_dc |
           event_legacy_numeric sep_dc |
-          event_legacy_raw sep_dc |
-          event_bpf_file
+          event_legacy_raw sep_dc
 
 event_pmu:
 PE_NAME opt_pmu_config
 {
        struct parse_events_state *parse_state = _parse_state;
-       struct parse_events_error *error = parse_state->error;
        struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL;
        char *pattern = NULL;
 
-#define CLEANUP_YYABORT                                        \
+#define CLEANUP                                                \
        do {                                            \
                parse_events_terms__delete($2);         \
                parse_events_terms__delete(orig_terms); \
                free(list);                             \
                free($1);                               \
                free(pattern);                          \
-               YYABORT;                                \
        } while(0)
 
-       if (parse_events_copy_term_list($2, &orig_terms))
-               CLEANUP_YYABORT;
-
-       if (error)
-               error->idx = @1.first_column;
+       if (parse_events_copy_term_list($2, &orig_terms)) {
+               CLEANUP;
+               YYNOMEM;
+       }
 
        list = alloc_list();
-       if (!list)
-               CLEANUP_YYABORT;
+       if (!list) {
+               CLEANUP;
+               YYNOMEM;
+       }
        /* Attempt to add to list assuming $1 is a PMU name. */
-       if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false)) {
+       if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false, &@1)) {
                struct perf_pmu *pmu = NULL;
                int ok = 0;
 
                /* Failure to add, try wildcard expansion of $1 as a PMU name. */
-               if (asprintf(&pattern, "%s*", $1) < 0)
-                       CLEANUP_YYABORT;
+               if (asprintf(&pattern, "%s*", $1) < 0) {
+                       CLEANUP;
+                       YYNOMEM;
+               }
 
                while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-                       char *name = pmu->name;
+                       const char *name = pmu->name;
 
                        if (parse_events__filter_pmu(parse_state, pmu))
                                continue;
@@ -330,10 +319,12 @@ PE_NAME opt_pmu_config
                            !perf_pmu__match(pattern, pmu->alias_name, $1)) {
                                bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
 
-                               if (parse_events_copy_term_list(orig_terms, &terms))
-                                       CLEANUP_YYABORT;
+                               if (parse_events_copy_term_list(orig_terms, &terms)) {
+                                       CLEANUP;
+                                       YYNOMEM;
+                               }
                                if (!parse_events_add_pmu(parse_state, list, pmu->name, terms,
-                                                         auto_merge_stats)) {
+                                                         auto_merge_stats, &@1)) {
                                        ok++;
                                        parse_state->wild_card_pmus = true;
                                }
@@ -344,30 +335,26 @@ PE_NAME opt_pmu_config
                if (!ok) {
                        /* Failure to add, assume $1 is an event name. */
                        zfree(&list);
-                       ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list);
+                       ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list, &@1);
                        $2 = NULL;
                }
-               if (!ok)
-                       CLEANUP_YYABORT;
+               if (!ok) {
+                       struct parse_events_error *error = parse_state->error;
+                       char *help;
+
+                       if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", $1) < 0)
+                               help = NULL;
+                       parse_events_error__handle(error, @1.first_column,
+                                                  strdup("Bad event or PMU"),
+                                                  help);
+                       CLEANUP;
+                       YYABORT;
+               }
        }
-       parse_events_terms__delete($2);
-       parse_events_terms__delete(orig_terms);
-       free(pattern);
-       free($1);
-       $$ = list;
-#undef CLEANUP_YYABORT
-}
-|
-PE_KERNEL_PMU_EVENT sep_dc
-{
-       struct list_head *list;
-       int err;
-
-       err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list);
-       free($1);
-       if (err < 0)
-               YYABORT;
        $$ = list;
+       list = NULL;
+       CLEANUP;
+#undef CLEANUP
 }
 |
 PE_NAME sep_dc
@@ -375,61 +362,19 @@ PE_NAME sep_dc
        struct list_head *list;
        int err;
 
-       err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list);
-       free($1);
-       if (err < 0)
-               YYABORT;
-       $$ = list;
-}
-|
-PE_KERNEL_PMU_EVENT opt_pmu_config
-{
-       struct list_head *list;
-       int err;
-
-       /* frees $2 */
-       err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list);
-       free($1);
-       if (err < 0)
-               YYABORT;
-       $$ = list;
-}
-|
-PE_PMU_EVENT_FAKE sep_dc
-{
-       struct list_head *list;
-       int err;
-
-       list = alloc_list();
-       if (!list)
-               YYABORT;
-
-       err = parse_events_add_pmu(_parse_state, list, $1, /*head_config=*/NULL,
-                                  /*auto_merge_stats=*/false);
-       free($1);
+       err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1);
        if (err < 0) {
-               free(list);
-               YYABORT;
-       }
-       $$ = list;
-}
-|
-PE_PMU_EVENT_FAKE opt_pmu_config
-{
-       struct list_head *list;
-       int err;
-
-       list = alloc_list();
-       if (!list)
-               YYABORT;
+               struct parse_events_state *parse_state = _parse_state;
+               struct parse_events_error *error = parse_state->error;
+               char *help;
 
-       err = parse_events_add_pmu(_parse_state, list, $1, $2, /*auto_merge_stats=*/false);
-       free($1);
-       parse_events_terms__delete($2);
-       if (err < 0) {
-               free(list);
-               YYABORT;
+               if (asprintf(&help, "Unable to find event on a PMU of '%s'", $1) < 0)
+                       help = NULL;
+               parse_events_error__handle(error, @1.first_column, strdup("Bad event name"), help);
+               free($1);
+               PE_ABORT(err);
        }
+       free($1);
        $$ = list;
 }
 
@@ -448,12 +393,13 @@ value_sym '/' event_config '/'
        bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard);
        parse_events_terms__delete($3);
        if (err) {
                free_list_evsel(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -464,21 +410,28 @@ value_sym sep_slash_slash_dc
        int type = $1 >> 16;
        int config = $1 & 255;
        bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
+       int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
-       ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config,
-                                         /*head_config=*/NULL, wildcard));
+       if (!list)
+               YYNOMEM;
+       err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard);
+       if (err)
+               PE_ABORT(err);
        $$ = list;
 }
 |
 PE_VALUE_SYM_TOOL sep_slash_slash_dc
 {
        struct list_head *list;
+       int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
-       ABORT_ON(parse_events_add_tool(_parse_state, list, $1));
+       if (!list)
+               YYNOMEM;
+       err = parse_events_add_tool(_parse_state, list, $1);
+       if (err)
+               YYNOMEM;
        $$ = list;
 }
 
@@ -490,14 +443,16 @@ PE_LEGACY_CACHE opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
+
        err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2);
 
        parse_events_terms__delete($2);
        free($1);
        if (err) {
                free_list_evsel(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -509,14 +464,16 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
+
        err = parse_events_add_breakpoint(_parse_state, list,
                                          $2, $6, $4, $7);
        parse_events_terms__delete($7);
        free($6);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -527,13 +484,15 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
+
        err = parse_events_add_breakpoint(_parse_state, list,
                                          $2, NULL, $4, $5);
        parse_events_terms__delete($5);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -544,14 +503,16 @@ PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
+
        err = parse_events_add_breakpoint(_parse_state, list,
                                          $2, $4, 0, $5);
        parse_events_terms__delete($5);
        free($4);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -562,13 +523,14 @@ PE_PREFIX_MEM PE_VALUE opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        err = parse_events_add_breakpoint(_parse_state, list,
                                          $2, NULL, 0, $3);
        parse_events_terms__delete($3);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -582,19 +544,20 @@ tracepoint_name opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        if (error)
                error->idx = @1.first_column;
 
        err = parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event,
-                                       error, $2);
+                                       error, $2, &@1);
 
        parse_events_terms__delete($2);
        free($1.sys);
        free($1.event);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -614,13 +577,14 @@ PE_VALUE ':' PE_VALUE opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4,
                                       /*wildcard=*/false);
        parse_events_terms__delete($4);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -633,52 +597,20 @@ PE_RAW opt_event_config
        u64 num;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        errno = 0;
        num = strtoull($1 + 1, NULL, 16);
-       ABORT_ON(errno);
+       /* Given the lexer will only give [a-fA-F0-9]+ a failure here should be impossible. */
+       if (errno)
+               YYABORT;
        free($1);
        err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, num, $2,
                                       /*wildcard=*/false);
        parse_events_terms__delete($2);
        if (err) {
                free(list);
-               YYABORT;
-       }
-       $$ = list;
-}
-
-event_bpf_file:
-PE_BPF_OBJECT opt_event_config
-{
-       struct parse_events_state *parse_state = _parse_state;
-       struct list_head *list;
-       int err;
-
-       list = alloc_list();
-       ABORT_ON(!list);
-       err = parse_events_load_bpf(parse_state, list, $1, false, $2);
-       parse_events_terms__delete($2);
-       free($1);
-       if (err) {
-               free(list);
-               YYABORT;
-       }
-       $$ = list;
-}
-|
-PE_BPF_SOURCE opt_event_config
-{
-       struct list_head *list;
-       int err;
-
-       list = alloc_list();
-       ABORT_ON(!list);
-       err = parse_events_load_bpf(_parse_state, list, $1, true, $2);
-       parse_events_terms__delete($2);
-       if (err) {
-               free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -738,7 +670,8 @@ event_term
        struct list_head *head = malloc(sizeof(*head));
        struct parse_events_term *term = $1;
 
-       ABORT_ON(!head);
+       if (!head)
+               YYNOMEM;
        INIT_LIST_HEAD(head);
        list_add_tail(&term->list, head);
        $$ = head;
@@ -752,11 +685,12 @@ event_term:
 PE_RAW
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW,
+                                        strdup("raw"), $1, &@1, &@1);
 
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW,
-                                       strdup("raw"), $1, &@1, &@1)) {
+       if (err) {
                free($1);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -764,12 +698,12 @@ PE_RAW
 name_or_raw '=' name_or_legacy
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $3, &@1, &@3);
 
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $3, &@1, &@3)) {
+       if (err) {
                free($1);
                free($3);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -777,11 +711,12 @@ name_or_raw '=' name_or_legacy
 name_or_raw '=' PE_VALUE
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                        $1, $3, /*novalue=*/false, &@1, &@3);
 
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $3, false, &@1, &@3)) {
+       if (err) {
                free($1);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -789,12 +724,13 @@ name_or_raw '=' PE_VALUE
 name_or_raw '=' PE_TERM_HW
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                        $1, $3.str, &@1, &@3);
 
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $3.str, &@1, &@3)) {
+       if (err) {
                free($1);
                free($3.str);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -802,11 +738,12 @@ name_or_raw '=' PE_TERM_HW
 PE_LEGACY_CACHE
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
+                                        $1, /*num=*/1, /*novalue=*/true, &@1, /*loc_val=*/NULL);
 
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
-                                       $1, 1, true, &@1, NULL)) {
+       if (err) {
                free($1);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -814,11 +751,12 @@ PE_LEGACY_CACHE
 PE_NAME
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                        $1, /*num=*/1, /*novalue=*/true, &@1, /*loc_val=*/NULL);
 
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, 1, true, &@1, NULL)) {
+       if (err) {
                free($1);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -826,11 +764,13 @@ PE_NAME
 PE_TERM_HW
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE,
+                                        $1.str, $1.num & 255, /*novalue=*/false,
+                                        &@1, /*loc_val=*/NULL);
 
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE,
-                                  $1.str, $1.num & 255, false, &@1, NULL)) {
+       if (err) {
                free($1.str);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -838,10 +778,12 @@ PE_TERM_HW
 PE_TERM '=' name_or_legacy
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, (enum parse_events__term_type)$1,
+                                       /*config=*/NULL, $3, &@1, &@3);
 
-       if (parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3)) {
+       if (err) {
                free($3);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -849,10 +791,12 @@ PE_TERM '=' name_or_legacy
 PE_TERM '=' PE_TERM_HW
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, (enum parse_events__term_type)$1,
+                                        /*config=*/NULL, $3.str, &@1, &@3);
 
-       if (parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3)) {
+       if (err) {
                free($3.str);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -860,53 +804,39 @@ PE_TERM '=' PE_TERM_HW
 PE_TERM '=' PE_TERM
 {
        struct parse_events_term *term;
+       int err = parse_events_term__term(&term,
+                                         (enum parse_events__term_type)$1,
+                                         (enum parse_events__term_type)$3,
+                                         &@1, &@3);
+
+       if (err)
+               PE_ABORT(err);
 
-       ABORT_ON(parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3));
        $$ = term;
 }
 |
 PE_TERM '=' PE_VALUE
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, (enum parse_events__term_type)$1,
+                                        /*config=*/NULL, $3, /*novalue=*/false, &@1, &@3);
+
+       if (err)
+               PE_ABORT(err);
 
-       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
        $$ = term;
 }
 |
 PE_TERM
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, (enum parse_events__term_type)$1,
+                                       /*config=*/NULL, /*num=*/1, /*novalue=*/true,
+                                       &@1, /*loc_val=*/NULL);
 
-       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
-       $$ = term;
-}
-|
-name_or_raw array '=' name_or_legacy
-{
-       struct parse_events_term *term;
+       if (err)
+               PE_ABORT(err);
 
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $4, &@1, &@4)) {
-               free($1);
-               free($4);
-               free($2.ranges);
-               YYABORT;
-       }
-       term->array = $2;
-       $$ = term;
-}
-|
-name_or_raw array '=' PE_VALUE
-{
-       struct parse_events_term *term;
-
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $4, false, &@1, &@4)) {
-               free($1);
-               free($2.ranges);
-               YYABORT;
-       }
-       term->array = $2;
        $$ = term;
 }
 |
@@ -914,73 +844,19 @@ PE_DRV_CFG_TERM
 {
        struct parse_events_term *term;
        char *config = strdup($1);
+       int err;
 
-       ABORT_ON(!config);
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
-                                       config, $1, &@1, NULL)) {
+       if (!config)
+               YYNOMEM;
+       err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, config, $1, &@1, NULL);
+       if (err) {
                free($1);
                free(config);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
 
-array:
-'[' array_terms ']'
-{
-       $$ = $2;
-}
-|
-PE_ARRAY_ALL
-{
-       $$.nr_ranges = 0;
-       $$.ranges = NULL;
-}
-
-array_terms:
-array_terms ',' array_term
-{
-       struct parse_events_array new_array;
-
-       new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
-       new_array.ranges = realloc($1.ranges,
-                               sizeof(new_array.ranges[0]) *
-                               new_array.nr_ranges);
-       ABORT_ON(!new_array.ranges);
-       memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
-              $3.nr_ranges * sizeof(new_array.ranges[0]));
-       free($3.ranges);
-       $$ = new_array;
-}
-|
-array_term
-
-array_term:
-PE_VALUE
-{
-       struct parse_events_array array;
-
-       array.nr_ranges = 1;
-       array.ranges = malloc(sizeof(array.ranges[0]));
-       ABORT_ON(!array.ranges);
-       array.ranges[0].start = $1;
-       array.ranges[0].length = 1;
-       $$ = array;
-}
-|
-PE_VALUE PE_ARRAY_RANGE PE_VALUE
-{
-       struct parse_events_array array;
-
-       ABORT_ON($3 < $1);
-       array.nr_ranges = 1;
-       array.ranges = malloc(sizeof(array.ranges[0]));
-       ABORT_ON(!array.ranges);
-       array.ranges[0].start = $1;
-       array.ranges[0].length = $3 - $1 + 1;
-       $$ = array;
-}
-
 sep_dc: ':' |
 
 sep_slash_slash_dc: '/' '/' | ':' |
diff --git a/tools/perf/util/perf-regs-arch/Build b/tools/perf/util/perf-regs-arch/Build
new file mode 100644 (file)
index 0000000..d9d596d
--- /dev/null
@@ -0,0 +1,9 @@
+perf-y += perf_regs_aarch64.o
+perf-y += perf_regs_arm.o
+perf-y += perf_regs_csky.o
+perf-y += perf_regs_loongarch.o
+perf-y += perf_regs_mips.o
+perf-y += perf_regs_powerpc.o
+perf-y += perf_regs_riscv.o
+perf-y += perf_regs_s390.o
+perf-y += perf_regs_x86.o
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_aarch64.c b/tools/perf/util/perf-regs-arch/perf_regs_aarch64.c
new file mode 100644 (file)
index 0000000..696566c
--- /dev/null
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/arm64/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_arm64(int id)
+{
+       switch (id) {
+       case PERF_REG_ARM64_X0:
+               return "x0";
+       case PERF_REG_ARM64_X1:
+               return "x1";
+       case PERF_REG_ARM64_X2:
+               return "x2";
+       case PERF_REG_ARM64_X3:
+               return "x3";
+       case PERF_REG_ARM64_X4:
+               return "x4";
+       case PERF_REG_ARM64_X5:
+               return "x5";
+       case PERF_REG_ARM64_X6:
+               return "x6";
+       case PERF_REG_ARM64_X7:
+               return "x7";
+       case PERF_REG_ARM64_X8:
+               return "x8";
+       case PERF_REG_ARM64_X9:
+               return "x9";
+       case PERF_REG_ARM64_X10:
+               return "x10";
+       case PERF_REG_ARM64_X11:
+               return "x11";
+       case PERF_REG_ARM64_X12:
+               return "x12";
+       case PERF_REG_ARM64_X13:
+               return "x13";
+       case PERF_REG_ARM64_X14:
+               return "x14";
+       case PERF_REG_ARM64_X15:
+               return "x15";
+       case PERF_REG_ARM64_X16:
+               return "x16";
+       case PERF_REG_ARM64_X17:
+               return "x17";
+       case PERF_REG_ARM64_X18:
+               return "x18";
+       case PERF_REG_ARM64_X19:
+               return "x19";
+       case PERF_REG_ARM64_X20:
+               return "x20";
+       case PERF_REG_ARM64_X21:
+               return "x21";
+       case PERF_REG_ARM64_X22:
+               return "x22";
+       case PERF_REG_ARM64_X23:
+               return "x23";
+       case PERF_REG_ARM64_X24:
+               return "x24";
+       case PERF_REG_ARM64_X25:
+               return "x25";
+       case PERF_REG_ARM64_X26:
+               return "x26";
+       case PERF_REG_ARM64_X27:
+               return "x27";
+       case PERF_REG_ARM64_X28:
+               return "x28";
+       case PERF_REG_ARM64_X29:
+               return "x29";
+       case PERF_REG_ARM64_SP:
+               return "sp";
+       case PERF_REG_ARM64_LR:
+               return "lr";
+       case PERF_REG_ARM64_PC:
+               return "pc";
+       case PERF_REG_ARM64_VG:
+               return "vg";
+       default:
+               return NULL;
+       }
+
+       return NULL;
+}
+
+uint64_t __perf_reg_ip_arm64(void)
+{
+       return PERF_REG_ARM64_PC;
+}
+
+uint64_t __perf_reg_sp_arm64(void)
+{
+       return PERF_REG_ARM64_SP;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_arm.c b/tools/perf/util/perf-regs-arch/perf_regs_arm.c
new file mode 100644 (file)
index 0000000..700fd07
--- /dev/null
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/arm/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_arm(int id)
+{
+       switch (id) {
+       case PERF_REG_ARM_R0:
+               return "r0";
+       case PERF_REG_ARM_R1:
+               return "r1";
+       case PERF_REG_ARM_R2:
+               return "r2";
+       case PERF_REG_ARM_R3:
+               return "r3";
+       case PERF_REG_ARM_R4:
+               return "r4";
+       case PERF_REG_ARM_R5:
+               return "r5";
+       case PERF_REG_ARM_R6:
+               return "r6";
+       case PERF_REG_ARM_R7:
+               return "r7";
+       case PERF_REG_ARM_R8:
+               return "r8";
+       case PERF_REG_ARM_R9:
+               return "r9";
+       case PERF_REG_ARM_R10:
+               return "r10";
+       case PERF_REG_ARM_FP:
+               return "fp";
+       case PERF_REG_ARM_IP:
+               return "ip";
+       case PERF_REG_ARM_SP:
+               return "sp";
+       case PERF_REG_ARM_LR:
+               return "lr";
+       case PERF_REG_ARM_PC:
+               return "pc";
+       default:
+               return NULL;
+       }
+
+       return NULL;
+}
+
+uint64_t __perf_reg_ip_arm(void)
+{
+       return PERF_REG_ARM_PC;
+}
+
+uint64_t __perf_reg_sp_arm(void)
+{
+       return PERF_REG_ARM_SP;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_csky.c b/tools/perf/util/perf-regs-arch/perf_regs_csky.c
new file mode 100644 (file)
index 0000000..a284109
--- /dev/null
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../arch/csky/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_csky(int id)
+{
+       switch (id) {
+       case PERF_REG_CSKY_A0:
+               return "a0";
+       case PERF_REG_CSKY_A1:
+               return "a1";
+       case PERF_REG_CSKY_A2:
+               return "a2";
+       case PERF_REG_CSKY_A3:
+               return "a3";
+       case PERF_REG_CSKY_REGS0:
+               return "regs0";
+       case PERF_REG_CSKY_REGS1:
+               return "regs1";
+       case PERF_REG_CSKY_REGS2:
+               return "regs2";
+       case PERF_REG_CSKY_REGS3:
+               return "regs3";
+       case PERF_REG_CSKY_REGS4:
+               return "regs4";
+       case PERF_REG_CSKY_REGS5:
+               return "regs5";
+       case PERF_REG_CSKY_REGS6:
+               return "regs6";
+       case PERF_REG_CSKY_REGS7:
+               return "regs7";
+       case PERF_REG_CSKY_REGS8:
+               return "regs8";
+       case PERF_REG_CSKY_REGS9:
+               return "regs9";
+       case PERF_REG_CSKY_SP:
+               return "sp";
+       case PERF_REG_CSKY_LR:
+               return "lr";
+       case PERF_REG_CSKY_PC:
+               return "pc";
+#if defined(__CSKYABIV2__)
+       case PERF_REG_CSKY_EXREGS0:
+               return "exregs0";
+       case PERF_REG_CSKY_EXREGS1:
+               return "exregs1";
+       case PERF_REG_CSKY_EXREGS2:
+               return "exregs2";
+       case PERF_REG_CSKY_EXREGS3:
+               return "exregs3";
+       case PERF_REG_CSKY_EXREGS4:
+               return "exregs4";
+       case PERF_REG_CSKY_EXREGS5:
+               return "exregs5";
+       case PERF_REG_CSKY_EXREGS6:
+               return "exregs6";
+       case PERF_REG_CSKY_EXREGS7:
+               return "exregs7";
+       case PERF_REG_CSKY_EXREGS8:
+               return "exregs8";
+       case PERF_REG_CSKY_EXREGS9:
+               return "exregs9";
+       case PERF_REG_CSKY_EXREGS10:
+               return "exregs10";
+       case PERF_REG_CSKY_EXREGS11:
+               return "exregs11";
+       case PERF_REG_CSKY_EXREGS12:
+               return "exregs12";
+       case PERF_REG_CSKY_EXREGS13:
+               return "exregs13";
+       case PERF_REG_CSKY_EXREGS14:
+               return "exregs14";
+       case PERF_REG_CSKY_TLS:
+               return "tls";
+       case PERF_REG_CSKY_HI:
+               return "hi";
+       case PERF_REG_CSKY_LO:
+               return "lo";
+#endif
+       default:
+               return NULL;
+       }
+
+       return NULL;
+}
+
+uint64_t __perf_reg_ip_csky(void)
+{
+       return PERF_REG_CSKY_PC;
+}
+
+uint64_t __perf_reg_sp_csky(void)
+{
+       return PERF_REG_CSKY_SP;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_loongarch.c b/tools/perf/util/perf-regs-arch/perf_regs_loongarch.c
new file mode 100644 (file)
index 0000000..a9ba0f9
--- /dev/null
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/loongarch/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_loongarch(int id)
+{
+       switch (id) {
+       case PERF_REG_LOONGARCH_PC:
+               return "PC";
+       case PERF_REG_LOONGARCH_R1:
+               return "%r1";
+       case PERF_REG_LOONGARCH_R2:
+               return "%r2";
+       case PERF_REG_LOONGARCH_R3:
+               return "%r3";
+       case PERF_REG_LOONGARCH_R4:
+               return "%r4";
+       case PERF_REG_LOONGARCH_R5:
+               return "%r5";
+       case PERF_REG_LOONGARCH_R6:
+               return "%r6";
+       case PERF_REG_LOONGARCH_R7:
+               return "%r7";
+       case PERF_REG_LOONGARCH_R8:
+               return "%r8";
+       case PERF_REG_LOONGARCH_R9:
+               return "%r9";
+       case PERF_REG_LOONGARCH_R10:
+               return "%r10";
+       case PERF_REG_LOONGARCH_R11:
+               return "%r11";
+       case PERF_REG_LOONGARCH_R12:
+               return "%r12";
+       case PERF_REG_LOONGARCH_R13:
+               return "%r13";
+       case PERF_REG_LOONGARCH_R14:
+               return "%r14";
+       case PERF_REG_LOONGARCH_R15:
+               return "%r15";
+       case PERF_REG_LOONGARCH_R16:
+               return "%r16";
+       case PERF_REG_LOONGARCH_R17:
+               return "%r17";
+       case PERF_REG_LOONGARCH_R18:
+               return "%r18";
+       case PERF_REG_LOONGARCH_R19:
+               return "%r19";
+       case PERF_REG_LOONGARCH_R20:
+               return "%r20";
+       case PERF_REG_LOONGARCH_R21:
+               return "%r21";
+       case PERF_REG_LOONGARCH_R22:
+               return "%r22";
+       case PERF_REG_LOONGARCH_R23:
+               return "%r23";
+       case PERF_REG_LOONGARCH_R24:
+               return "%r24";
+       case PERF_REG_LOONGARCH_R25:
+               return "%r25";
+       case PERF_REG_LOONGARCH_R26:
+               return "%r26";
+       case PERF_REG_LOONGARCH_R27:
+               return "%r27";
+       case PERF_REG_LOONGARCH_R28:
+               return "%r28";
+       case PERF_REG_LOONGARCH_R29:
+               return "%r29";
+       case PERF_REG_LOONGARCH_R30:
+               return "%r30";
+       case PERF_REG_LOONGARCH_R31:
+               return "%r31";
+       default:
+               break;
+       }
+       return NULL;
+}
+
+uint64_t __perf_reg_ip_loongarch(void)
+{
+       return PERF_REG_LOONGARCH_PC;
+}
+
+uint64_t __perf_reg_sp_loongarch(void)
+{
+       return PERF_REG_LOONGARCH_R3;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_mips.c b/tools/perf/util/perf-regs-arch/perf_regs_mips.c
new file mode 100644 (file)
index 0000000..5a45830
--- /dev/null
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/mips/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_mips(int id)
+{
+       switch (id) {
+       case PERF_REG_MIPS_PC:
+               return "PC";
+       case PERF_REG_MIPS_R1:
+               return "$1";
+       case PERF_REG_MIPS_R2:
+               return "$2";
+       case PERF_REG_MIPS_R3:
+               return "$3";
+       case PERF_REG_MIPS_R4:
+               return "$4";
+       case PERF_REG_MIPS_R5:
+               return "$5";
+       case PERF_REG_MIPS_R6:
+               return "$6";
+       case PERF_REG_MIPS_R7:
+               return "$7";
+       case PERF_REG_MIPS_R8:
+               return "$8";
+       case PERF_REG_MIPS_R9:
+               return "$9";
+       case PERF_REG_MIPS_R10:
+               return "$10";
+       case PERF_REG_MIPS_R11:
+               return "$11";
+       case PERF_REG_MIPS_R12:
+               return "$12";
+       case PERF_REG_MIPS_R13:
+               return "$13";
+       case PERF_REG_MIPS_R14:
+               return "$14";
+       case PERF_REG_MIPS_R15:
+               return "$15";
+       case PERF_REG_MIPS_R16:
+               return "$16";
+       case PERF_REG_MIPS_R17:
+               return "$17";
+       case PERF_REG_MIPS_R18:
+               return "$18";
+       case PERF_REG_MIPS_R19:
+               return "$19";
+       case PERF_REG_MIPS_R20:
+               return "$20";
+       case PERF_REG_MIPS_R21:
+               return "$21";
+       case PERF_REG_MIPS_R22:
+               return "$22";
+       case PERF_REG_MIPS_R23:
+               return "$23";
+       case PERF_REG_MIPS_R24:
+               return "$24";
+       case PERF_REG_MIPS_R25:
+               return "$25";
+       case PERF_REG_MIPS_R28:
+               return "$28";
+       case PERF_REG_MIPS_R29:
+               return "$29";
+       case PERF_REG_MIPS_R30:
+               return "$30";
+       case PERF_REG_MIPS_R31:
+               return "$31";
+       default:
+               break;
+       }
+       return NULL;
+}
+
+uint64_t __perf_reg_ip_mips(void)
+{
+       return PERF_REG_MIPS_PC;
+}
+
+uint64_t __perf_reg_sp_mips(void)
+{
+       return PERF_REG_MIPS_R29;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_powerpc.c b/tools/perf/util/perf-regs-arch/perf_regs_powerpc.c
new file mode 100644 (file)
index 0000000..1f0d682
--- /dev/null
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/powerpc/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_powerpc(int id)
+{
+       switch (id) {
+       case PERF_REG_POWERPC_R0:
+               return "r0";
+       case PERF_REG_POWERPC_R1:
+               return "r1";
+       case PERF_REG_POWERPC_R2:
+               return "r2";
+       case PERF_REG_POWERPC_R3:
+               return "r3";
+       case PERF_REG_POWERPC_R4:
+               return "r4";
+       case PERF_REG_POWERPC_R5:
+               return "r5";
+       case PERF_REG_POWERPC_R6:
+               return "r6";
+       case PERF_REG_POWERPC_R7:
+               return "r7";
+       case PERF_REG_POWERPC_R8:
+               return "r8";
+       case PERF_REG_POWERPC_R9:
+               return "r9";
+       case PERF_REG_POWERPC_R10:
+               return "r10";
+       case PERF_REG_POWERPC_R11:
+               return "r11";
+       case PERF_REG_POWERPC_R12:
+               return "r12";
+       case PERF_REG_POWERPC_R13:
+               return "r13";
+       case PERF_REG_POWERPC_R14:
+               return "r14";
+       case PERF_REG_POWERPC_R15:
+               return "r15";
+       case PERF_REG_POWERPC_R16:
+               return "r16";
+       case PERF_REG_POWERPC_R17:
+               return "r17";
+       case PERF_REG_POWERPC_R18:
+               return "r18";
+       case PERF_REG_POWERPC_R19:
+               return "r19";
+       case PERF_REG_POWERPC_R20:
+               return "r20";
+       case PERF_REG_POWERPC_R21:
+               return "r21";
+       case PERF_REG_POWERPC_R22:
+               return "r22";
+       case PERF_REG_POWERPC_R23:
+               return "r23";
+       case PERF_REG_POWERPC_R24:
+               return "r24";
+       case PERF_REG_POWERPC_R25:
+               return "r25";
+       case PERF_REG_POWERPC_R26:
+               return "r26";
+       case PERF_REG_POWERPC_R27:
+               return "r27";
+       case PERF_REG_POWERPC_R28:
+               return "r28";
+       case PERF_REG_POWERPC_R29:
+               return "r29";
+       case PERF_REG_POWERPC_R30:
+               return "r30";
+       case PERF_REG_POWERPC_R31:
+               return "r31";
+       case PERF_REG_POWERPC_NIP:
+               return "nip";
+       case PERF_REG_POWERPC_MSR:
+               return "msr";
+       case PERF_REG_POWERPC_ORIG_R3:
+               return "orig_r3";
+       case PERF_REG_POWERPC_CTR:
+               return "ctr";
+       case PERF_REG_POWERPC_LINK:
+               return "link";
+       case PERF_REG_POWERPC_XER:
+               return "xer";
+       case PERF_REG_POWERPC_CCR:
+               return "ccr";
+       case PERF_REG_POWERPC_SOFTE:
+               return "softe";
+       case PERF_REG_POWERPC_TRAP:
+               return "trap";
+       case PERF_REG_POWERPC_DAR:
+               return "dar";
+       case PERF_REG_POWERPC_DSISR:
+               return "dsisr";
+       case PERF_REG_POWERPC_SIER:
+               return "sier";
+       case PERF_REG_POWERPC_MMCRA:
+               return "mmcra";
+       case PERF_REG_POWERPC_MMCR0:
+               return "mmcr0";
+       case PERF_REG_POWERPC_MMCR1:
+               return "mmcr1";
+       case PERF_REG_POWERPC_MMCR2:
+               return "mmcr2";
+       case PERF_REG_POWERPC_MMCR3:
+               return "mmcr3";
+       case PERF_REG_POWERPC_SIER2:
+               return "sier2";
+       case PERF_REG_POWERPC_SIER3:
+               return "sier3";
+       case PERF_REG_POWERPC_PMC1:
+               return "pmc1";
+       case PERF_REG_POWERPC_PMC2:
+               return "pmc2";
+       case PERF_REG_POWERPC_PMC3:
+               return "pmc3";
+       case PERF_REG_POWERPC_PMC4:
+               return "pmc4";
+       case PERF_REG_POWERPC_PMC5:
+               return "pmc5";
+       case PERF_REG_POWERPC_PMC6:
+               return "pmc6";
+       case PERF_REG_POWERPC_SDAR:
+               return "sdar";
+       case PERF_REG_POWERPC_SIAR:
+               return "siar";
+       default:
+               break;
+       }
+       return NULL;
+}
+
+uint64_t __perf_reg_ip_powerpc(void)
+{
+       return PERF_REG_POWERPC_NIP;
+}
+
+uint64_t __perf_reg_sp_powerpc(void)
+{
+       return PERF_REG_POWERPC_R1;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c
new file mode 100644 (file)
index 0000000..e432630
--- /dev/null
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/riscv/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_riscv(int id)
+{
+       switch (id) {
+       case PERF_REG_RISCV_PC:
+               return "pc";
+       case PERF_REG_RISCV_RA:
+               return "ra";
+       case PERF_REG_RISCV_SP:
+               return "sp";
+       case PERF_REG_RISCV_GP:
+               return "gp";
+       case PERF_REG_RISCV_TP:
+               return "tp";
+       case PERF_REG_RISCV_T0:
+               return "t0";
+       case PERF_REG_RISCV_T1:
+               return "t1";
+       case PERF_REG_RISCV_T2:
+               return "t2";
+       case PERF_REG_RISCV_S0:
+               return "s0";
+       case PERF_REG_RISCV_S1:
+               return "s1";
+       case PERF_REG_RISCV_A0:
+               return "a0";
+       case PERF_REG_RISCV_A1:
+               return "a1";
+       case PERF_REG_RISCV_A2:
+               return "a2";
+       case PERF_REG_RISCV_A3:
+               return "a3";
+       case PERF_REG_RISCV_A4:
+               return "a4";
+       case PERF_REG_RISCV_A5:
+               return "a5";
+       case PERF_REG_RISCV_A6:
+               return "a6";
+       case PERF_REG_RISCV_A7:
+               return "a7";
+       case PERF_REG_RISCV_S2:
+               return "s2";
+       case PERF_REG_RISCV_S3:
+               return "s3";
+       case PERF_REG_RISCV_S4:
+               return "s4";
+       case PERF_REG_RISCV_S5:
+               return "s5";
+       case PERF_REG_RISCV_S6:
+               return "s6";
+       case PERF_REG_RISCV_S7:
+               return "s7";
+       case PERF_REG_RISCV_S8:
+               return "s8";
+       case PERF_REG_RISCV_S9:
+               return "s9";
+       case PERF_REG_RISCV_S10:
+               return "s10";
+       case PERF_REG_RISCV_S11:
+               return "s11";
+       case PERF_REG_RISCV_T3:
+               return "t3";
+       case PERF_REG_RISCV_T4:
+               return "t4";
+       case PERF_REG_RISCV_T5:
+               return "t5";
+       case PERF_REG_RISCV_T6:
+               return "t6";
+       default:
+               return NULL;
+       }
+
+       return NULL;
+}
+
+uint64_t __perf_reg_ip_riscv(void)
+{
+       return PERF_REG_RISCV_PC;
+}
+
+uint64_t __perf_reg_sp_riscv(void)
+{
+       return PERF_REG_RISCV_SP;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_s390.c b/tools/perf/util/perf-regs-arch/perf_regs_s390.c
new file mode 100644 (file)
index 0000000..1c7a46d
--- /dev/null
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/s390/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_s390(int id)
+{
+       switch (id) {
+       case PERF_REG_S390_R0:
+               return "R0";
+       case PERF_REG_S390_R1:
+               return "R1";
+       case PERF_REG_S390_R2:
+               return "R2";
+       case PERF_REG_S390_R3:
+               return "R3";
+       case PERF_REG_S390_R4:
+               return "R4";
+       case PERF_REG_S390_R5:
+               return "R5";
+       case PERF_REG_S390_R6:
+               return "R6";
+       case PERF_REG_S390_R7:
+               return "R7";
+       case PERF_REG_S390_R8:
+               return "R8";
+       case PERF_REG_S390_R9:
+               return "R9";
+       case PERF_REG_S390_R10:
+               return "R10";
+       case PERF_REG_S390_R11:
+               return "R11";
+       case PERF_REG_S390_R12:
+               return "R12";
+       case PERF_REG_S390_R13:
+               return "R13";
+       case PERF_REG_S390_R14:
+               return "R14";
+       case PERF_REG_S390_R15:
+               return "R15";
+       case PERF_REG_S390_FP0:
+               return "FP0";
+       case PERF_REG_S390_FP1:
+               return "FP1";
+       case PERF_REG_S390_FP2:
+               return "FP2";
+       case PERF_REG_S390_FP3:
+               return "FP3";
+       case PERF_REG_S390_FP4:
+               return "FP4";
+       case PERF_REG_S390_FP5:
+               return "FP5";
+       case PERF_REG_S390_FP6:
+               return "FP6";
+       case PERF_REG_S390_FP7:
+               return "FP7";
+       case PERF_REG_S390_FP8:
+               return "FP8";
+       case PERF_REG_S390_FP9:
+               return "FP9";
+       case PERF_REG_S390_FP10:
+               return "FP10";
+       case PERF_REG_S390_FP11:
+               return "FP11";
+       case PERF_REG_S390_FP12:
+               return "FP12";
+       case PERF_REG_S390_FP13:
+               return "FP13";
+       case PERF_REG_S390_FP14:
+               return "FP14";
+       case PERF_REG_S390_FP15:
+               return "FP15";
+       case PERF_REG_S390_MASK:
+               return "MASK";
+       case PERF_REG_S390_PC:
+               return "PC";
+       default:
+               return NULL;
+       }
+
+       return NULL;
+}
+
+uint64_t __perf_reg_ip_s390(void)
+{
+       return PERF_REG_S390_PC;
+}
+
+uint64_t __perf_reg_sp_s390(void)
+{
+       return PERF_REG_S390_R15;
+}
+
+#endif
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_x86.c b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
new file mode 100644 (file)
index 0000000..873c620
--- /dev/null
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+
+#include "../perf_regs.h"
+#include "../../../arch/x86/include/uapi/asm/perf_regs.h"
+
+const char *__perf_reg_name_x86(int id)
+{
+       switch (id) {
+       case PERF_REG_X86_AX:
+               return "AX";
+       case PERF_REG_X86_BX:
+               return "BX";
+       case PERF_REG_X86_CX:
+               return "CX";
+       case PERF_REG_X86_DX:
+               return "DX";
+       case PERF_REG_X86_SI:
+               return "SI";
+       case PERF_REG_X86_DI:
+               return "DI";
+       case PERF_REG_X86_BP:
+               return "BP";
+       case PERF_REG_X86_SP:
+               return "SP";
+       case PERF_REG_X86_IP:
+               return "IP";
+       case PERF_REG_X86_FLAGS:
+               return "FLAGS";
+       case PERF_REG_X86_CS:
+               return "CS";
+       case PERF_REG_X86_SS:
+               return "SS";
+       case PERF_REG_X86_DS:
+               return "DS";
+       case PERF_REG_X86_ES:
+               return "ES";
+       case PERF_REG_X86_FS:
+               return "FS";
+       case PERF_REG_X86_GS:
+               return "GS";
+       case PERF_REG_X86_R8:
+               return "R8";
+       case PERF_REG_X86_R9:
+               return "R9";
+       case PERF_REG_X86_R10:
+               return "R10";
+       case PERF_REG_X86_R11:
+               return "R11";
+       case PERF_REG_X86_R12:
+               return "R12";
+       case PERF_REG_X86_R13:
+               return "R13";
+       case PERF_REG_X86_R14:
+               return "R14";
+       case PERF_REG_X86_R15:
+               return "R15";
+
+#define XMM(x) \
+       case PERF_REG_X86_XMM ## x:     \
+       case PERF_REG_X86_XMM ## x + 1: \
+               return "XMM" #x;
+       XMM(0)
+       XMM(1)
+       XMM(2)
+       XMM(3)
+       XMM(4)
+       XMM(5)
+       XMM(6)
+       XMM(7)
+       XMM(8)
+       XMM(9)
+       XMM(10)
+       XMM(11)
+       XMM(12)
+       XMM(13)
+       XMM(14)
+       XMM(15)
+#undef XMM
+       default:
+               return NULL;
+       }
+
+       return NULL;
+}
+
+uint64_t __perf_reg_ip_x86(void)
+{
+       return PERF_REG_X86_IP;
+}
+
+uint64_t __perf_reg_sp_x86(void)
+{
+       return PERF_REG_X86_SP;
+}
+
+#endif
index 9bdbaa3..e227585 100644 (file)
@@ -3,6 +3,7 @@
 #include <string.h>
 #include "perf_regs.h"
 #include "util/sample.h"
+#include "debug.h"
 
 int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
                                 char **new_op __maybe_unused)
@@ -12,732 +13,16 @@ int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
 
 uint64_t __weak arch__intr_reg_mask(void)
 {
-       return PERF_REGS_MASK;
+       return 0;
 }
 
 uint64_t __weak arch__user_reg_mask(void)
 {
-       return PERF_REGS_MASK;
+       return 0;
 }
 
 #ifdef HAVE_PERF_REGS_SUPPORT
 
-#define perf_event_arm_regs perf_event_arm64_regs
-#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
-#undef perf_event_arm_regs
-
-#include "../../arch/arm/include/uapi/asm/perf_regs.h"
-#include "../../arch/csky/include/uapi/asm/perf_regs.h"
-#include "../../arch/loongarch/include/uapi/asm/perf_regs.h"
-#include "../../arch/mips/include/uapi/asm/perf_regs.h"
-#include "../../arch/powerpc/include/uapi/asm/perf_regs.h"
-#include "../../arch/riscv/include/uapi/asm/perf_regs.h"
-#include "../../arch/s390/include/uapi/asm/perf_regs.h"
-#include "../../arch/x86/include/uapi/asm/perf_regs.h"
-
-static const char *__perf_reg_name_arm64(int id)
-{
-       switch (id) {
-       case PERF_REG_ARM64_X0:
-               return "x0";
-       case PERF_REG_ARM64_X1:
-               return "x1";
-       case PERF_REG_ARM64_X2:
-               return "x2";
-       case PERF_REG_ARM64_X3:
-               return "x3";
-       case PERF_REG_ARM64_X4:
-               return "x4";
-       case PERF_REG_ARM64_X5:
-               return "x5";
-       case PERF_REG_ARM64_X6:
-               return "x6";
-       case PERF_REG_ARM64_X7:
-               return "x7";
-       case PERF_REG_ARM64_X8:
-               return "x8";
-       case PERF_REG_ARM64_X9:
-               return "x9";
-       case PERF_REG_ARM64_X10:
-               return "x10";
-       case PERF_REG_ARM64_X11:
-               return "x11";
-       case PERF_REG_ARM64_X12:
-               return "x12";
-       case PERF_REG_ARM64_X13:
-               return "x13";
-       case PERF_REG_ARM64_X14:
-               return "x14";
-       case PERF_REG_ARM64_X15:
-               return "x15";
-       case PERF_REG_ARM64_X16:
-               return "x16";
-       case PERF_REG_ARM64_X17:
-               return "x17";
-       case PERF_REG_ARM64_X18:
-               return "x18";
-       case PERF_REG_ARM64_X19:
-               return "x19";
-       case PERF_REG_ARM64_X20:
-               return "x20";
-       case PERF_REG_ARM64_X21:
-               return "x21";
-       case PERF_REG_ARM64_X22:
-               return "x22";
-       case PERF_REG_ARM64_X23:
-               return "x23";
-       case PERF_REG_ARM64_X24:
-               return "x24";
-       case PERF_REG_ARM64_X25:
-               return "x25";
-       case PERF_REG_ARM64_X26:
-               return "x26";
-       case PERF_REG_ARM64_X27:
-               return "x27";
-       case PERF_REG_ARM64_X28:
-               return "x28";
-       case PERF_REG_ARM64_X29:
-               return "x29";
-       case PERF_REG_ARM64_SP:
-               return "sp";
-       case PERF_REG_ARM64_LR:
-               return "lr";
-       case PERF_REG_ARM64_PC:
-               return "pc";
-       case PERF_REG_ARM64_VG:
-               return "vg";
-       default:
-               return NULL;
-       }
-
-       return NULL;
-}
-
-static const char *__perf_reg_name_arm(int id)
-{
-       switch (id) {
-       case PERF_REG_ARM_R0:
-               return "r0";
-       case PERF_REG_ARM_R1:
-               return "r1";
-       case PERF_REG_ARM_R2:
-               return "r2";
-       case PERF_REG_ARM_R3:
-               return "r3";
-       case PERF_REG_ARM_R4:
-               return "r4";
-       case PERF_REG_ARM_R5:
-               return "r5";
-       case PERF_REG_ARM_R6:
-               return "r6";
-       case PERF_REG_ARM_R7:
-               return "r7";
-       case PERF_REG_ARM_R8:
-               return "r8";
-       case PERF_REG_ARM_R9:
-               return "r9";
-       case PERF_REG_ARM_R10:
-               return "r10";
-       case PERF_REG_ARM_FP:
-               return "fp";
-       case PERF_REG_ARM_IP:
-               return "ip";
-       case PERF_REG_ARM_SP:
-               return "sp";
-       case PERF_REG_ARM_LR:
-               return "lr";
-       case PERF_REG_ARM_PC:
-               return "pc";
-       default:
-               return NULL;
-       }
-
-       return NULL;
-}
-
-static const char *__perf_reg_name_csky(int id)
-{
-       switch (id) {
-       case PERF_REG_CSKY_A0:
-               return "a0";
-       case PERF_REG_CSKY_A1:
-               return "a1";
-       case PERF_REG_CSKY_A2:
-               return "a2";
-       case PERF_REG_CSKY_A3:
-               return "a3";
-       case PERF_REG_CSKY_REGS0:
-               return "regs0";
-       case PERF_REG_CSKY_REGS1:
-               return "regs1";
-       case PERF_REG_CSKY_REGS2:
-               return "regs2";
-       case PERF_REG_CSKY_REGS3:
-               return "regs3";
-       case PERF_REG_CSKY_REGS4:
-               return "regs4";
-       case PERF_REG_CSKY_REGS5:
-               return "regs5";
-       case PERF_REG_CSKY_REGS6:
-               return "regs6";
-       case PERF_REG_CSKY_REGS7:
-               return "regs7";
-       case PERF_REG_CSKY_REGS8:
-               return "regs8";
-       case PERF_REG_CSKY_REGS9:
-               return "regs9";
-       case PERF_REG_CSKY_SP:
-               return "sp";
-       case PERF_REG_CSKY_LR:
-               return "lr";
-       case PERF_REG_CSKY_PC:
-               return "pc";
-#if defined(__CSKYABIV2__)
-       case PERF_REG_CSKY_EXREGS0:
-               return "exregs0";
-       case PERF_REG_CSKY_EXREGS1:
-               return "exregs1";
-       case PERF_REG_CSKY_EXREGS2:
-               return "exregs2";
-       case PERF_REG_CSKY_EXREGS3:
-               return "exregs3";
-       case PERF_REG_CSKY_EXREGS4:
-               return "exregs4";
-       case PERF_REG_CSKY_EXREGS5:
-               return "exregs5";
-       case PERF_REG_CSKY_EXREGS6:
-               return "exregs6";
-       case PERF_REG_CSKY_EXREGS7:
-               return "exregs7";
-       case PERF_REG_CSKY_EXREGS8:
-               return "exregs8";
-       case PERF_REG_CSKY_EXREGS9:
-               return "exregs9";
-       case PERF_REG_CSKY_EXREGS10:
-               return "exregs10";
-       case PERF_REG_CSKY_EXREGS11:
-               return "exregs11";
-       case PERF_REG_CSKY_EXREGS12:
-               return "exregs12";
-       case PERF_REG_CSKY_EXREGS13:
-               return "exregs13";
-       case PERF_REG_CSKY_EXREGS14:
-               return "exregs14";
-       case PERF_REG_CSKY_TLS:
-               return "tls";
-       case PERF_REG_CSKY_HI:
-               return "hi";
-       case PERF_REG_CSKY_LO:
-               return "lo";
-#endif
-       default:
-               return NULL;
-       }
-
-       return NULL;
-}
-
-static inline const char *__perf_reg_name_loongarch(int id)
-{
-       switch (id) {
-       case PERF_REG_LOONGARCH_PC:
-               return "PC";
-       case PERF_REG_LOONGARCH_R1:
-               return "%r1";
-       case PERF_REG_LOONGARCH_R2:
-               return "%r2";
-       case PERF_REG_LOONGARCH_R3:
-               return "%r3";
-       case PERF_REG_LOONGARCH_R4:
-               return "%r4";
-       case PERF_REG_LOONGARCH_R5:
-               return "%r5";
-       case PERF_REG_LOONGARCH_R6:
-               return "%r6";
-       case PERF_REG_LOONGARCH_R7:
-               return "%r7";
-       case PERF_REG_LOONGARCH_R8:
-               return "%r8";
-       case PERF_REG_LOONGARCH_R9:
-               return "%r9";
-       case PERF_REG_LOONGARCH_R10:
-               return "%r10";
-       case PERF_REG_LOONGARCH_R11:
-               return "%r11";
-       case PERF_REG_LOONGARCH_R12:
-               return "%r12";
-       case PERF_REG_LOONGARCH_R13:
-               return "%r13";
-       case PERF_REG_LOONGARCH_R14:
-               return "%r14";
-       case PERF_REG_LOONGARCH_R15:
-               return "%r15";
-       case PERF_REG_LOONGARCH_R16:
-               return "%r16";
-       case PERF_REG_LOONGARCH_R17:
-               return "%r17";
-       case PERF_REG_LOONGARCH_R18:
-               return "%r18";
-       case PERF_REG_LOONGARCH_R19:
-               return "%r19";
-       case PERF_REG_LOONGARCH_R20:
-               return "%r20";
-       case PERF_REG_LOONGARCH_R21:
-               return "%r21";
-       case PERF_REG_LOONGARCH_R22:
-               return "%r22";
-       case PERF_REG_LOONGARCH_R23:
-               return "%r23";
-       case PERF_REG_LOONGARCH_R24:
-               return "%r24";
-       case PERF_REG_LOONGARCH_R25:
-               return "%r25";
-       case PERF_REG_LOONGARCH_R26:
-               return "%r26";
-       case PERF_REG_LOONGARCH_R27:
-               return "%r27";
-       case PERF_REG_LOONGARCH_R28:
-               return "%r28";
-       case PERF_REG_LOONGARCH_R29:
-               return "%r29";
-       case PERF_REG_LOONGARCH_R30:
-               return "%r30";
-       case PERF_REG_LOONGARCH_R31:
-               return "%r31";
-       default:
-               break;
-       }
-       return NULL;
-}
-
-static const char *__perf_reg_name_mips(int id)
-{
-       switch (id) {
-       case PERF_REG_MIPS_PC:
-               return "PC";
-       case PERF_REG_MIPS_R1:
-               return "$1";
-       case PERF_REG_MIPS_R2:
-               return "$2";
-       case PERF_REG_MIPS_R3:
-               return "$3";
-       case PERF_REG_MIPS_R4:
-               return "$4";
-       case PERF_REG_MIPS_R5:
-               return "$5";
-       case PERF_REG_MIPS_R6:
-               return "$6";
-       case PERF_REG_MIPS_R7:
-               return "$7";
-       case PERF_REG_MIPS_R8:
-               return "$8";
-       case PERF_REG_MIPS_R9:
-               return "$9";
-       case PERF_REG_MIPS_R10:
-               return "$10";
-       case PERF_REG_MIPS_R11:
-               return "$11";
-       case PERF_REG_MIPS_R12:
-               return "$12";
-       case PERF_REG_MIPS_R13:
-               return "$13";
-       case PERF_REG_MIPS_R14:
-               return "$14";
-       case PERF_REG_MIPS_R15:
-               return "$15";
-       case PERF_REG_MIPS_R16:
-               return "$16";
-       case PERF_REG_MIPS_R17:
-               return "$17";
-       case PERF_REG_MIPS_R18:
-               return "$18";
-       case PERF_REG_MIPS_R19:
-               return "$19";
-       case PERF_REG_MIPS_R20:
-               return "$20";
-       case PERF_REG_MIPS_R21:
-               return "$21";
-       case PERF_REG_MIPS_R22:
-               return "$22";
-       case PERF_REG_MIPS_R23:
-               return "$23";
-       case PERF_REG_MIPS_R24:
-               return "$24";
-       case PERF_REG_MIPS_R25:
-               return "$25";
-       case PERF_REG_MIPS_R28:
-               return "$28";
-       case PERF_REG_MIPS_R29:
-               return "$29";
-       case PERF_REG_MIPS_R30:
-               return "$30";
-       case PERF_REG_MIPS_R31:
-               return "$31";
-       default:
-               break;
-       }
-       return NULL;
-}
-
-static const char *__perf_reg_name_powerpc(int id)
-{
-       switch (id) {
-       case PERF_REG_POWERPC_R0:
-               return "r0";
-       case PERF_REG_POWERPC_R1:
-               return "r1";
-       case PERF_REG_POWERPC_R2:
-               return "r2";
-       case PERF_REG_POWERPC_R3:
-               return "r3";
-       case PERF_REG_POWERPC_R4:
-               return "r4";
-       case PERF_REG_POWERPC_R5:
-               return "r5";
-       case PERF_REG_POWERPC_R6:
-               return "r6";
-       case PERF_REG_POWERPC_R7:
-               return "r7";
-       case PERF_REG_POWERPC_R8:
-               return "r8";
-       case PERF_REG_POWERPC_R9:
-               return "r9";
-       case PERF_REG_POWERPC_R10:
-               return "r10";
-       case PERF_REG_POWERPC_R11:
-               return "r11";
-       case PERF_REG_POWERPC_R12:
-               return "r12";
-       case PERF_REG_POWERPC_R13:
-               return "r13";
-       case PERF_REG_POWERPC_R14:
-               return "r14";
-       case PERF_REG_POWERPC_R15:
-               return "r15";
-       case PERF_REG_POWERPC_R16:
-               return "r16";
-       case PERF_REG_POWERPC_R17:
-               return "r17";
-       case PERF_REG_POWERPC_R18:
-               return "r18";
-       case PERF_REG_POWERPC_R19:
-               return "r19";
-       case PERF_REG_POWERPC_R20:
-               return "r20";
-       case PERF_REG_POWERPC_R21:
-               return "r21";
-       case PERF_REG_POWERPC_R22:
-               return "r22";
-       case PERF_REG_POWERPC_R23:
-               return "r23";
-       case PERF_REG_POWERPC_R24:
-               return "r24";
-       case PERF_REG_POWERPC_R25:
-               return "r25";
-       case PERF_REG_POWERPC_R26:
-               return "r26";
-       case PERF_REG_POWERPC_R27:
-               return "r27";
-       case PERF_REG_POWERPC_R28:
-               return "r28";
-       case PERF_REG_POWERPC_R29:
-               return "r29";
-       case PERF_REG_POWERPC_R30:
-               return "r30";
-       case PERF_REG_POWERPC_R31:
-               return "r31";
-       case PERF_REG_POWERPC_NIP:
-               return "nip";
-       case PERF_REG_POWERPC_MSR:
-               return "msr";
-       case PERF_REG_POWERPC_ORIG_R3:
-               return "orig_r3";
-       case PERF_REG_POWERPC_CTR:
-               return "ctr";
-       case PERF_REG_POWERPC_LINK:
-               return "link";
-       case PERF_REG_POWERPC_XER:
-               return "xer";
-       case PERF_REG_POWERPC_CCR:
-               return "ccr";
-       case PERF_REG_POWERPC_SOFTE:
-               return "softe";
-       case PERF_REG_POWERPC_TRAP:
-               return "trap";
-       case PERF_REG_POWERPC_DAR:
-               return "dar";
-       case PERF_REG_POWERPC_DSISR:
-               return "dsisr";
-       case PERF_REG_POWERPC_SIER:
-               return "sier";
-       case PERF_REG_POWERPC_MMCRA:
-               return "mmcra";
-       case PERF_REG_POWERPC_MMCR0:
-               return "mmcr0";
-       case PERF_REG_POWERPC_MMCR1:
-               return "mmcr1";
-       case PERF_REG_POWERPC_MMCR2:
-               return "mmcr2";
-       case PERF_REG_POWERPC_MMCR3:
-               return "mmcr3";
-       case PERF_REG_POWERPC_SIER2:
-               return "sier2";
-       case PERF_REG_POWERPC_SIER3:
-               return "sier3";
-       case PERF_REG_POWERPC_PMC1:
-               return "pmc1";
-       case PERF_REG_POWERPC_PMC2:
-               return "pmc2";
-       case PERF_REG_POWERPC_PMC3:
-               return "pmc3";
-       case PERF_REG_POWERPC_PMC4:
-               return "pmc4";
-       case PERF_REG_POWERPC_PMC5:
-               return "pmc5";
-       case PERF_REG_POWERPC_PMC6:
-               return "pmc6";
-       case PERF_REG_POWERPC_SDAR:
-               return "sdar";
-       case PERF_REG_POWERPC_SIAR:
-               return "siar";
-       default:
-               break;
-       }
-       return NULL;
-}
-
-static const char *__perf_reg_name_riscv(int id)
-{
-       switch (id) {
-       case PERF_REG_RISCV_PC:
-               return "pc";
-       case PERF_REG_RISCV_RA:
-               return "ra";
-       case PERF_REG_RISCV_SP:
-               return "sp";
-       case PERF_REG_RISCV_GP:
-               return "gp";
-       case PERF_REG_RISCV_TP:
-               return "tp";
-       case PERF_REG_RISCV_T0:
-               return "t0";
-       case PERF_REG_RISCV_T1:
-               return "t1";
-       case PERF_REG_RISCV_T2:
-               return "t2";
-       case PERF_REG_RISCV_S0:
-               return "s0";
-       case PERF_REG_RISCV_S1:
-               return "s1";
-       case PERF_REG_RISCV_A0:
-               return "a0";
-       case PERF_REG_RISCV_A1:
-               return "a1";
-       case PERF_REG_RISCV_A2:
-               return "a2";
-       case PERF_REG_RISCV_A3:
-               return "a3";
-       case PERF_REG_RISCV_A4:
-               return "a4";
-       case PERF_REG_RISCV_A5:
-               return "a5";
-       case PERF_REG_RISCV_A6:
-               return "a6";
-       case PERF_REG_RISCV_A7:
-               return "a7";
-       case PERF_REG_RISCV_S2:
-               return "s2";
-       case PERF_REG_RISCV_S3:
-               return "s3";
-       case PERF_REG_RISCV_S4:
-               return "s4";
-       case PERF_REG_RISCV_S5:
-               return "s5";
-       case PERF_REG_RISCV_S6:
-               return "s6";
-       case PERF_REG_RISCV_S7:
-               return "s7";
-       case PERF_REG_RISCV_S8:
-               return "s8";
-       case PERF_REG_RISCV_S9:
-               return "s9";
-       case PERF_REG_RISCV_S10:
-               return "s10";
-       case PERF_REG_RISCV_S11:
-               return "s11";
-       case PERF_REG_RISCV_T3:
-               return "t3";
-       case PERF_REG_RISCV_T4:
-               return "t4";
-       case PERF_REG_RISCV_T5:
-               return "t5";
-       case PERF_REG_RISCV_T6:
-               return "t6";
-       default:
-               return NULL;
-       }
-
-       return NULL;
-}
-
-static const char *__perf_reg_name_s390(int id)
-{
-       switch (id) {
-       case PERF_REG_S390_R0:
-               return "R0";
-       case PERF_REG_S390_R1:
-               return "R1";
-       case PERF_REG_S390_R2:
-               return "R2";
-       case PERF_REG_S390_R3:
-               return "R3";
-       case PERF_REG_S390_R4:
-               return "R4";
-       case PERF_REG_S390_R5:
-               return "R5";
-       case PERF_REG_S390_R6:
-               return "R6";
-       case PERF_REG_S390_R7:
-               return "R7";
-       case PERF_REG_S390_R8:
-               return "R8";
-       case PERF_REG_S390_R9:
-               return "R9";
-       case PERF_REG_S390_R10:
-               return "R10";
-       case PERF_REG_S390_R11:
-               return "R11";
-       case PERF_REG_S390_R12:
-               return "R12";
-       case PERF_REG_S390_R13:
-               return "R13";
-       case PERF_REG_S390_R14:
-               return "R14";
-       case PERF_REG_S390_R15:
-               return "R15";
-       case PERF_REG_S390_FP0:
-               return "FP0";
-       case PERF_REG_S390_FP1:
-               return "FP1";
-       case PERF_REG_S390_FP2:
-               return "FP2";
-       case PERF_REG_S390_FP3:
-               return "FP3";
-       case PERF_REG_S390_FP4:
-               return "FP4";
-       case PERF_REG_S390_FP5:
-               return "FP5";
-       case PERF_REG_S390_FP6:
-               return "FP6";
-       case PERF_REG_S390_FP7:
-               return "FP7";
-       case PERF_REG_S390_FP8:
-               return "FP8";
-       case PERF_REG_S390_FP9:
-               return "FP9";
-       case PERF_REG_S390_FP10:
-               return "FP10";
-       case PERF_REG_S390_FP11:
-               return "FP11";
-       case PERF_REG_S390_FP12:
-               return "FP12";
-       case PERF_REG_S390_FP13:
-               return "FP13";
-       case PERF_REG_S390_FP14:
-               return "FP14";
-       case PERF_REG_S390_FP15:
-               return "FP15";
-       case PERF_REG_S390_MASK:
-               return "MASK";
-       case PERF_REG_S390_PC:
-               return "PC";
-       default:
-               return NULL;
-       }
-
-       return NULL;
-}
-
-static const char *__perf_reg_name_x86(int id)
-{
-       switch (id) {
-       case PERF_REG_X86_AX:
-               return "AX";
-       case PERF_REG_X86_BX:
-               return "BX";
-       case PERF_REG_X86_CX:
-               return "CX";
-       case PERF_REG_X86_DX:
-               return "DX";
-       case PERF_REG_X86_SI:
-               return "SI";
-       case PERF_REG_X86_DI:
-               return "DI";
-       case PERF_REG_X86_BP:
-               return "BP";
-       case PERF_REG_X86_SP:
-               return "SP";
-       case PERF_REG_X86_IP:
-               return "IP";
-       case PERF_REG_X86_FLAGS:
-               return "FLAGS";
-       case PERF_REG_X86_CS:
-               return "CS";
-       case PERF_REG_X86_SS:
-               return "SS";
-       case PERF_REG_X86_DS:
-               return "DS";
-       case PERF_REG_X86_ES:
-               return "ES";
-       case PERF_REG_X86_FS:
-               return "FS";
-       case PERF_REG_X86_GS:
-               return "GS";
-       case PERF_REG_X86_R8:
-               return "R8";
-       case PERF_REG_X86_R9:
-               return "R9";
-       case PERF_REG_X86_R10:
-               return "R10";
-       case PERF_REG_X86_R11:
-               return "R11";
-       case PERF_REG_X86_R12:
-               return "R12";
-       case PERF_REG_X86_R13:
-               return "R13";
-       case PERF_REG_X86_R14:
-               return "R14";
-       case PERF_REG_X86_R15:
-               return "R15";
-
-#define XMM(x) \
-       case PERF_REG_X86_XMM ## x:     \
-       case PERF_REG_X86_XMM ## x + 1: \
-               return "XMM" #x;
-       XMM(0)
-       XMM(1)
-       XMM(2)
-       XMM(3)
-       XMM(4)
-       XMM(5)
-       XMM(6)
-       XMM(7)
-       XMM(8)
-       XMM(9)
-       XMM(10)
-       XMM(11)
-       XMM(12)
-       XMM(13)
-       XMM(14)
-       XMM(15)
-#undef XMM
-       default:
-               return NULL;
-       }
-
-       return NULL;
-}
-
 const char *perf_reg_name(int id, const char *arch)
 {
        const char *reg_name = NULL;
@@ -790,4 +75,55 @@ out:
        *valp = regs->cache_regs[id];
        return 0;
 }
+
+uint64_t perf_arch_reg_ip(const char *arch)
+{
+       if (!strcmp(arch, "arm"))
+               return __perf_reg_ip_arm();
+       else if (!strcmp(arch, "arm64"))
+               return __perf_reg_ip_arm64();
+       else if (!strcmp(arch, "csky"))
+               return __perf_reg_ip_csky();
+       else if (!strcmp(arch, "loongarch"))
+               return __perf_reg_ip_loongarch();
+       else if (!strcmp(arch, "mips"))
+               return __perf_reg_ip_mips();
+       else if (!strcmp(arch, "powerpc"))
+               return __perf_reg_ip_powerpc();
+       else if (!strcmp(arch, "riscv"))
+               return __perf_reg_ip_riscv();
+       else if (!strcmp(arch, "s390"))
+               return __perf_reg_ip_s390();
+       else if (!strcmp(arch, "x86"))
+               return __perf_reg_ip_x86();
+
+       pr_err("Fail to find IP register for arch %s, returns 0\n", arch);
+       return 0;
+}
+
+uint64_t perf_arch_reg_sp(const char *arch)
+{
+       if (!strcmp(arch, "arm"))
+               return __perf_reg_sp_arm();
+       else if (!strcmp(arch, "arm64"))
+               return __perf_reg_sp_arm64();
+       else if (!strcmp(arch, "csky"))
+               return __perf_reg_sp_csky();
+       else if (!strcmp(arch, "loongarch"))
+               return __perf_reg_sp_loongarch();
+       else if (!strcmp(arch, "mips"))
+               return __perf_reg_sp_mips();
+       else if (!strcmp(arch, "powerpc"))
+               return __perf_reg_sp_powerpc();
+       else if (!strcmp(arch, "riscv"))
+               return __perf_reg_sp_riscv();
+       else if (!strcmp(arch, "s390"))
+               return __perf_reg_sp_s390();
+       else if (!strcmp(arch, "x86"))
+               return __perf_reg_sp_x86();
+
+       pr_err("Fail to find SP register for arch %s, returns 0\n", arch);
+       return 0;
+}
+
 #endif
index ce1127a..ecd2a53 100644 (file)
@@ -30,18 +30,49 @@ uint64_t arch__user_reg_mask(void);
 #ifdef HAVE_PERF_REGS_SUPPORT
 extern const struct sample_reg sample_reg_masks[];
 
-#include <perf_regs.h>
-
-#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
-
 const char *perf_reg_name(int id, const char *arch);
 int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
+uint64_t perf_arch_reg_ip(const char *arch);
+uint64_t perf_arch_reg_sp(const char *arch);
+const char *__perf_reg_name_arm64(int id);
+uint64_t __perf_reg_ip_arm64(void);
+uint64_t __perf_reg_sp_arm64(void);
+const char *__perf_reg_name_arm(int id);
+uint64_t __perf_reg_ip_arm(void);
+uint64_t __perf_reg_sp_arm(void);
+const char *__perf_reg_name_csky(int id);
+uint64_t __perf_reg_ip_csky(void);
+uint64_t __perf_reg_sp_csky(void);
+const char *__perf_reg_name_loongarch(int id);
+uint64_t __perf_reg_ip_loongarch(void);
+uint64_t __perf_reg_sp_loongarch(void);
+const char *__perf_reg_name_mips(int id);
+uint64_t __perf_reg_ip_mips(void);
+uint64_t __perf_reg_sp_mips(void);
+const char *__perf_reg_name_powerpc(int id);
+uint64_t __perf_reg_ip_powerpc(void);
+uint64_t __perf_reg_sp_powerpc(void);
+const char *__perf_reg_name_riscv(int id);
+uint64_t __perf_reg_ip_riscv(void);
+uint64_t __perf_reg_sp_riscv(void);
+const char *__perf_reg_name_s390(int id);
+uint64_t __perf_reg_ip_s390(void);
+uint64_t __perf_reg_sp_s390(void);
+const char *__perf_reg_name_x86(int id);
+uint64_t __perf_reg_ip_x86(void);
+uint64_t __perf_reg_sp_x86(void);
+
+static inline uint64_t DWARF_MINIMAL_REGS(const char *arch)
+{
+       return (1ULL << perf_arch_reg_ip(arch)) | (1ULL << perf_arch_reg_sp(arch));
+}
 
 #else
-#define PERF_REGS_MASK 0
-#define PERF_REGS_MAX  0
 
-#define DWARF_MINIMAL_REGS PERF_REGS_MASK
+static inline uint64_t DWARF_MINIMAL_REGS(const char *arch __maybe_unused)
+{
+       return 0;
+}
 
 static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused)
 {
@@ -54,5 +85,16 @@ static inline int perf_reg_value(u64 *valp __maybe_unused,
 {
        return 0;
 }
+
+static inline uint64_t perf_arch_reg_ip(const char *arch __maybe_unused)
+{
+       return 0;
+}
+
+static inline uint64_t perf_arch_reg_sp(const char *arch __maybe_unused)
+{
+       return 0;
+}
+
 #endif /* HAVE_PERF_REGS_SUPPORT */
 #endif /* __PERF_REGS_H */
index 28380e7..d85602a 100644 (file)
@@ -19,8 +19,8 @@
 #include "evsel.h"
 #include "pmu.h"
 #include "pmus.h"
-#include "pmu-bison.h"
-#include "pmu-flex.h"
+#include <util/pmu-bison.h>
+#include <util/pmu-flex.h>
 #include "parse-events.h"
 #include "print-events.h"
 #include "header.h"
 #include "fncache.h"
 #include "util/evsel_config.h"
 
-struct perf_pmu perf_pmu__fake;
+struct perf_pmu perf_pmu__fake = {
+       .name = "fake",
+};
+
+#define UNIT_MAX_LEN   31 /* max length for event unit name */
+
+/**
+ * struct perf_pmu_alias - An event either read from sysfs or builtin in
+ * pmu-events.c, created by parsing the pmu-events json files.
+ */
+struct perf_pmu_alias {
+       /** @name: Name of the event like "mem-loads". */
+       char *name;
+       /** @desc: Optional short description of the event. */
+       char *desc;
+       /** @long_desc: Optional long description. */
+       char *long_desc;
+       /**
+        * @topic: Optional topic such as cache or pipeline, particularly for
+        * json events.
+        */
+       char *topic;
+       /** @terms: Owned list of the original parsed parameters. */
+       struct list_head terms;
+       /** @list: List element of struct perf_pmu aliases. */
+       struct list_head list;
+       /**
+        * @pmu_name: The name copied from the json struct pmu_event. This can
+        * differ from the PMU name as it won't have suffixes.
+        */
+       char *pmu_name;
+       /** @unit: Units for the event, such as bytes or cache lines. */
+       char unit[UNIT_MAX_LEN+1];
+       /** @scale: Value to scale read counter values by. */
+       double scale;
+       /**
+        * @per_pkg: Does the file
+        * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or
+        * equivalent json value exist and have the value 1.
+        */
+       bool per_pkg;
+       /**
+        * @snapshot: Does the file
+        * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.snapshot
+        * exist and have the value 1.
+        */
+       bool snapshot;
+       /**
+        * @deprecated: Is the event hidden and so not shown in perf list by
+        * default.
+        */
+       bool deprecated;
+       /** @from_sysfs: Was the alias from sysfs or a json event? */
+       bool from_sysfs;
+       /** @info_loaded: Have the scale, unit and other values been read from disk? */
+       bool info_loaded;
+};
 
 /**
  * struct perf_pmu_format - Values from a format file read from
@@ -40,6 +96,10 @@ struct perf_pmu perf_pmu__fake;
  * value=PERF_PMU_FORMAT_VALUE_CONFIG and bits 0 to 7 will be set.
  */
 struct perf_pmu_format {
+       /** @list: Element on list within struct perf_pmu. */
+       struct list_head list;
+       /** @bits: Which config bits are set by this format value. */
+       DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
        /** @name: The modifier/file name. */
        char *name;
        /**
@@ -47,18 +107,81 @@ struct perf_pmu_format {
         * are from PERF_PMU_FORMAT_VALUE_CONFIG to
         * PERF_PMU_FORMAT_VALUE_CONFIG_END.
         */
-       int value;
-       /** @bits: Which config bits are set by this format value. */
-       DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
-       /** @list: Element on list within struct perf_pmu. */
-       struct list_head list;
+       u16 value;
+       /** @loaded: Has the contents been loaded/parsed. */
+       bool loaded;
 };
 
+static int pmu_aliases_parse(struct perf_pmu *pmu);
+
+static struct perf_pmu_format *perf_pmu__new_format(struct list_head *list, char *name)
+{
+       struct perf_pmu_format *format;
+
+       format = zalloc(sizeof(*format));
+       if (!format)
+               return NULL;
+
+       format->name = strdup(name);
+       if (!format->name) {
+               free(format);
+               return NULL;
+       }
+       list_add_tail(&format->list, list);
+       return format;
+}
+
+/* Called at the end of parsing a format. */
+void perf_pmu_format__set_value(void *vformat, int config, unsigned long *bits)
+{
+       struct perf_pmu_format *format = vformat;
+
+       format->value = config;
+       memcpy(format->bits, bits, sizeof(format->bits));
+}
+
+static void __perf_pmu_format__load(struct perf_pmu_format *format, FILE *file)
+{
+       void *scanner;
+       int ret;
+
+       ret = perf_pmu_lex_init(&scanner);
+       if (ret)
+               return;
+
+       perf_pmu_set_in(file, scanner);
+       ret = perf_pmu_parse(format, scanner);
+       perf_pmu_lex_destroy(scanner);
+       format->loaded = true;
+}
+
+static void perf_pmu_format__load(struct perf_pmu *pmu, struct perf_pmu_format *format)
+{
+       char path[PATH_MAX];
+       FILE *file = NULL;
+
+       if (format->loaded)
+               return;
+
+       if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, "format"))
+               return;
+
+       assert(strlen(path) + strlen(format->name) + 2 < sizeof(path));
+       strcat(path, "/");
+       strcat(path, format->name);
+
+       file = fopen(path, "r");
+       if (!file)
+               return;
+       __perf_pmu_format__load(format, file);
+       fclose(file);
+}
+
 /*
  * Parse & process all the sysfs attributes located under
  * the directory specified in 'dir' parameter.
  */
-int perf_pmu__format_parse(int dirfd, struct list_head *head)
+int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load)
 {
        struct dirent *evt_ent;
        DIR *format_dir;
@@ -68,37 +191,35 @@ int perf_pmu__format_parse(int dirfd, struct list_head *head)
        if (!format_dir)
                return -EINVAL;
 
-       while (!ret && (evt_ent = readdir(format_dir))) {
+       while ((evt_ent = readdir(format_dir)) != NULL) {
+               struct perf_pmu_format *format;
                char *name = evt_ent->d_name;
-               int fd;
-               void *scanner;
-               FILE *file;
 
                if (!strcmp(name, ".") || !strcmp(name, ".."))
                        continue;
 
-
-               ret = -EINVAL;
-               fd = openat(dirfd, name, O_RDONLY);
-               if (fd < 0)
-                       break;
-
-               file = fdopen(fd, "r");
-               if (!file) {
-                       close(fd);
+               format = perf_pmu__new_format(&pmu->format, name);
+               if (!format) {
+                       ret = -ENOMEM;
                        break;
                }
 
-               ret = perf_pmu_lex_init(&scanner);
-               if (ret) {
+               if (eager_load) {
+                       FILE *file;
+                       int fd = openat(dirfd, name, O_RDONLY);
+
+                       if (fd < 0) {
+                               ret = -errno;
+                               break;
+                       }
+                       file = fdopen(fd, "r");
+                       if (!file) {
+                               close(fd);
+                               break;
+                       }
+                       __perf_pmu_format__load(format, file);
                        fclose(file);
-                       break;
                }
-
-               perf_pmu_set_in(file, scanner);
-               ret = perf_pmu_parse(head, name, scanner);
-               perf_pmu_lex_destroy(scanner);
-               fclose(file);
        }
 
        closedir(format_dir);
@@ -110,7 +231,7 @@ int perf_pmu__format_parse(int dirfd, struct list_head *head)
  * located at:
  * /sys/bus/event_source/devices/<dev>/format as sysfs group attributes.
  */
-static int pmu_format(int dirfd, const char *name, struct list_head *format)
+static int pmu_format(struct perf_pmu *pmu, int dirfd, const char *name)
 {
        int fd;
 
@@ -119,7 +240,7 @@ static int pmu_format(int dirfd, const char *name, struct list_head *format)
                return 0;
 
        /* it'll close the fd */
-       if (perf_pmu__format_parse(fd, format))
+       if (perf_pmu__format_parse(pmu, fd, /*eager_load=*/false))
                return -1;
 
        return 0;
@@ -162,17 +283,21 @@ out:
        return ret;
 }
 
-static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, int dirfd, char *name)
+static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
 {
        struct stat st;
        ssize_t sret;
+       size_t len;
        char scale[128];
        int fd, ret = -1;
        char path[PATH_MAX];
 
-       scnprintf(path, PATH_MAX, "%s.scale", name);
+       len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+       if (!len)
+               return 0;
+       scnprintf(path + len, sizeof(path) - len, "%s/%s.scale", pmu->name, alias->name);
 
-       fd = openat(dirfd, path, O_RDONLY);
+       fd = open(path, O_RDONLY);
        if (fd == -1)
                return -1;
 
@@ -194,15 +319,20 @@ error:
        return ret;
 }
 
-static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, int dirfd, char *name)
+static int perf_pmu__parse_unit(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
 {
        char path[PATH_MAX];
+       size_t len;
        ssize_t sret;
        int fd;
 
-       scnprintf(path, PATH_MAX, "%s.unit", name);
 
-       fd = openat(dirfd, path, O_RDONLY);
+       len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+       if (!len)
+               return 0;
+       scnprintf(path + len, sizeof(path) - len, "%s/%s.unit", pmu->name, alias->name);
+
+       fd = open(path, O_RDONLY);
        if (fd == -1)
                return -1;
 
@@ -225,14 +355,18 @@ error:
 }
 
 static int
-perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, int dirfd, char *name)
+perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
 {
        char path[PATH_MAX];
+       size_t len;
        int fd;
 
-       scnprintf(path, PATH_MAX, "%s.per-pkg", name);
+       len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+       if (!len)
+               return 0;
+       scnprintf(path + len, sizeof(path) - len, "%s/%s.per-pkg", pmu->name, alias->name);
 
-       fd = openat(dirfd, path, O_RDONLY);
+       fd = open(path, O_RDONLY);
        if (fd == -1)
                return -1;
 
@@ -242,15 +376,18 @@ perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, int dirfd, char *name)
        return 0;
 }
 
-static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
-                                   int dirfd, char *name)
+static int perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
 {
        char path[PATH_MAX];
+       size_t len;
        int fd;
 
-       scnprintf(path, PATH_MAX, "%s.snapshot", name);
+       len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+       if (!len)
+               return 0;
+       scnprintf(path + len, sizeof(path) - len, "%s/%s.snapshot", pmu->name, alias->name);
 
-       fd = openat(dirfd, path, O_RDONLY);
+       fd = open(path, O_RDONLY);
        if (fd == -1)
                return -1;
 
@@ -259,46 +396,13 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
        return 0;
 }
 
-static void perf_pmu_assign_str(char *name, const char *field, char **old_str,
-                               char **new_str)
-{
-       if (!*old_str)
-               goto set_new;
-
-       if (*new_str) { /* Have new string, check with old */
-               if (strcasecmp(*old_str, *new_str))
-                       pr_debug("alias %s differs in field '%s'\n",
-                                name, field);
-               zfree(old_str);
-       } else          /* Nothing new --> keep old string */
-               return;
-set_new:
-       *old_str = *new_str;
-       *new_str = NULL;
-}
-
-static void perf_pmu_update_alias(struct perf_pmu_alias *old,
-                                 struct perf_pmu_alias *newalias)
-{
-       perf_pmu_assign_str(old->name, "desc", &old->desc, &newalias->desc);
-       perf_pmu_assign_str(old->name, "long_desc", &old->long_desc,
-                           &newalias->long_desc);
-       perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic);
-       perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str);
-       old->scale = newalias->scale;
-       old->per_pkg = newalias->per_pkg;
-       old->snapshot = newalias->snapshot;
-       memcpy(old->unit, newalias->unit, sizeof(old->unit));
-}
-
 /* Delete an alias entry. */
-void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+static void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
 {
        zfree(&newalias->name);
        zfree(&newalias->desc);
        zfree(&newalias->long_desc);
        zfree(&newalias->topic);
-       zfree(&newalias->str);
        zfree(&newalias->pmu_name);
        parse_events_terms__purge(&newalias->terms);
        free(newalias);
@@ -314,38 +418,99 @@ static void perf_pmu__del_aliases(struct perf_pmu *pmu)
        }
 }
 
-/* Merge an alias, search in alias list. If this name is already
- * present merge both of them to combine all information.
- */
-static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias,
-                                struct list_head *alist)
+static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu,
+                                                  const char *name,
+                                                  bool load)
 {
-       struct perf_pmu_alias *a;
+       struct perf_pmu_alias *alias;
 
-       list_for_each_entry(a, alist, list) {
-               if (!strcasecmp(newalias->name, a->name)) {
-                       if (newalias->pmu_name && a->pmu_name &&
-                           !strcasecmp(newalias->pmu_name, a->pmu_name)) {
-                               continue;
-                       }
-                       perf_pmu_update_alias(a, newalias);
-                       perf_pmu_free_alias(newalias);
-                       return true;
-               }
+       if (load && !pmu->sysfs_aliases_loaded)
+               pmu_aliases_parse(pmu);
+
+       list_for_each_entry(alias, &pmu->aliases, list) {
+               if (!strcasecmp(alias->name, name))
+                       return alias;
        }
-       return false;
+       return NULL;
 }
 
-static int __perf_pmu__new_alias(struct list_head *list, int dirfd, char *name,
-                                char *desc, char *val, const struct pmu_event *pe)
+static bool assign_str(const char *name, const char *field, char **old_str,
+                               const char *new_str)
+{
+       if (!*old_str && new_str) {
+               *old_str = strdup(new_str);
+               return true;
+       }
+
+       if (!new_str || !strcasecmp(*old_str, new_str))
+               return false; /* Nothing to update. */
+
+       pr_debug("alias %s differs in field '%s' ('%s' != '%s')\n",
+               name, field, *old_str, new_str);
+       zfree(old_str);
+       *old_str = strdup(new_str);
+       return true;
+}
+
+static void read_alias_info(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
+{
+       if (!alias->from_sysfs || alias->info_loaded)
+               return;
+
+       /*
+        * load unit name and scale if available
+        */
+       perf_pmu__parse_unit(pmu, alias);
+       perf_pmu__parse_scale(pmu, alias);
+       perf_pmu__parse_per_pkg(pmu, alias);
+       perf_pmu__parse_snapshot(pmu, alias);
+}
+
+struct update_alias_data {
+       struct perf_pmu *pmu;
+       struct perf_pmu_alias *alias;
+};
+
+static int update_alias(const struct pmu_event *pe,
+                       const struct pmu_events_table *table __maybe_unused,
+                       void *vdata)
+{
+       struct update_alias_data *data = vdata;
+       int ret = 0;
+
+       read_alias_info(data->pmu, data->alias);
+       assign_str(pe->name, "desc", &data->alias->desc, pe->desc);
+       assign_str(pe->name, "long_desc", &data->alias->long_desc, pe->long_desc);
+       assign_str(pe->name, "topic", &data->alias->topic, pe->topic);
+       data->alias->per_pkg = pe->perpkg;
+       if (pe->event) {
+               parse_events_terms__purge(&data->alias->terms);
+               ret = parse_events_terms(&data->alias->terms, pe->event, /*input=*/NULL);
+       }
+       if (!ret && pe->unit) {
+               char *unit;
+
+               ret = perf_pmu__convert_scale(pe->unit, &unit, &data->alias->scale);
+               if (!ret)
+                       snprintf(data->alias->unit, sizeof(data->alias->unit), "%s", unit);
+       }
+       return ret;
+}
+
+static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
+                               const char *desc, const char *val, FILE *val_fd,
+                               const struct pmu_event *pe)
 {
-       struct parse_events_term *term;
        struct perf_pmu_alias *alias;
        int ret;
-       char newval[256];
        const char *long_desc = NULL, *topic = NULL, *unit = NULL, *pmu_name = NULL;
        bool deprecated = false, perpkg = false;
 
+       if (perf_pmu__find_alias(pmu, name, /*load=*/ false)) {
+               /* Alias was already created/loaded. */
+               return 0;
+       }
+
        if (pe) {
                long_desc = pe->long_desc;
                topic = pe->topic;
@@ -366,80 +531,49 @@ static int __perf_pmu__new_alias(struct list_head *list, int dirfd, char *name,
        alias->snapshot = false;
        alias->deprecated = deprecated;
 
-       ret = parse_events_terms(&alias->terms, val);
+       ret = parse_events_terms(&alias->terms, val, val_fd);
        if (ret) {
                pr_err("Cannot parse alias %s: %d\n", val, ret);
                free(alias);
                return ret;
        }
 
-       /* Scan event and remove leading zeroes, spaces, newlines, some
-        * platforms have terms specified as
-        * event=0x0091 (read from files ../<PMU>/events/<FILE>
-        * and terms specified as event=0x91 (read from JSON files).
-        *
-        * Rebuild string to make alias->str member comparable.
-        */
-       memset(newval, 0, sizeof(newval));
-       ret = 0;
-       list_for_each_entry(term, &alias->terms, list) {
-               if (ret)
-                       ret += scnprintf(newval + ret, sizeof(newval) - ret,
-                                        ",");
-               if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
-                       ret += scnprintf(newval + ret, sizeof(newval) - ret,
-                                        "%s=%#x", term->config, term->val.num);
-               else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
-                       ret += scnprintf(newval + ret, sizeof(newval) - ret,
-                                        "%s=%s", term->config, term->val.str);
-       }
-
        alias->name = strdup(name);
-       if (dirfd >= 0) {
-               /*
-                * load unit name and scale if available
-                */
-               perf_pmu__parse_unit(alias, dirfd, name);
-               perf_pmu__parse_scale(alias, dirfd, name);
-               perf_pmu__parse_per_pkg(alias, dirfd, name);
-               perf_pmu__parse_snapshot(alias, dirfd, name);
-       }
-
        alias->desc = desc ? strdup(desc) : NULL;
        alias->long_desc = long_desc ? strdup(long_desc) :
                                desc ? strdup(desc) : NULL;
        alias->topic = topic ? strdup(topic) : NULL;
+       alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL;
        if (unit) {
-               if (perf_pmu__convert_scale(unit, (char **)&unit, &alias->scale) < 0)
+               if (perf_pmu__convert_scale(unit, (char **)&unit, &alias->scale) < 0) {
+                       perf_pmu_free_alias(alias);
                        return -1;
+               }
                snprintf(alias->unit, sizeof(alias->unit), "%s", unit);
        }
-       alias->str = strdup(newval);
-       alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL;
-
-       if (!perf_pmu_merge_alias(alias, list))
-               list_add_tail(&alias->list, list);
+       if (!pe) {
+               /* Update an event from sysfs with json data. */
+               struct update_alias_data data = {
+                       .pmu = pmu,
+                       .alias = alias,
+               };
+
+               alias->from_sysfs = true;
+               if (pmu->events_table) {
+                       if (pmu_events_table__find_event(pmu->events_table, pmu, name,
+                                                        update_alias, &data) == 0)
+                               pmu->loaded_json_aliases++;
+               }
+       }
 
+       if (!pe)
+               pmu->sysfs_aliases++;
+       else
+               pmu->loaded_json_aliases++;
+       list_add_tail(&alias->list, &pmu->aliases);
        return 0;
 }
 
-static int perf_pmu__new_alias(struct list_head *list, int dirfd, char *name, FILE *file)
-{
-       char buf[256];
-       int ret;
-
-       ret = fread(buf, 1, sizeof(buf), file);
-       if (ret == 0)
-               return -EINVAL;
-
-       buf[ret] = 0;
-
-       /* Remove trailing newline from sysfs file */
-       strim(buf);
-
-       return __perf_pmu__new_alias(list, dirfd, name, NULL, buf, NULL);
-}
-
 static inline bool pmu_alias_info_file(char *name)
 {
        size_t len;
@@ -458,18 +592,33 @@ static inline bool pmu_alias_info_file(char *name)
 }
 
 /*
- * Process all the sysfs attributes located under the directory
- * specified in 'dir' parameter.
+ * Reading the pmu event aliases definition, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes.
  */
-static int pmu_aliases_parse(int dirfd, struct list_head *head)
+static int pmu_aliases_parse(struct perf_pmu *pmu)
 {
+       char path[PATH_MAX];
        struct dirent *evt_ent;
        DIR *event_dir;
-       int fd;
+       size_t len;
+       int fd, dir_fd;
 
-       event_dir = fdopendir(dirfd);
-       if (!event_dir)
+       len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path));
+       if (!len)
+               return 0;
+       scnprintf(path + len, sizeof(path) - len, "%s/events", pmu->name);
+
+       dir_fd = open(path, O_DIRECTORY);
+       if (dir_fd == -1) {
+               pmu->sysfs_aliases_loaded = true;
+               return 0;
+       }
+
+       event_dir = fdopendir(dir_fd);
+       if (!event_dir){
+               close (dir_fd);
                return -EINVAL;
+       }
 
        while ((evt_ent = readdir(event_dir))) {
                char *name = evt_ent->d_name;
@@ -484,7 +633,7 @@ static int pmu_aliases_parse(int dirfd, struct list_head *head)
                if (pmu_alias_info_file(name))
                        continue;
 
-               fd = openat(dirfd, name, O_RDONLY);
+               fd = openat(dir_fd, name, O_RDONLY);
                if (fd == -1) {
                        pr_debug("Cannot open %s\n", name);
                        continue;
@@ -495,31 +644,15 @@ static int pmu_aliases_parse(int dirfd, struct list_head *head)
                        continue;
                }
 
-               if (perf_pmu__new_alias(head, dirfd, name, file) < 0)
+               if (perf_pmu__new_alias(pmu, name, /*desc=*/ NULL,
+                                       /*val=*/ NULL, file, /*pe=*/ NULL) < 0)
                        pr_debug("Cannot set up %s\n", name);
                fclose(file);
        }
 
        closedir(event_dir);
-       return 0;
-}
-
-/*
- * Reading the pmu event aliases definition, which should be located at:
- * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes.
- */
-static int pmu_aliases(int dirfd, const char *name, struct list_head *head)
-{
-       int fd;
-
-       fd = perf_pmu__pathname_fd(dirfd, name, "events", O_DIRECTORY);
-       if (fd < 0)
-               return 0;
-
-       /* it'll close the fd */
-       if (pmu_aliases_parse(fd, head))
-               return -1;
-
+       close (dir_fd);
+       pmu->sysfs_aliases_loaded = true;
        return 0;
 }
 
@@ -741,28 +874,13 @@ out:
        return res;
 }
 
-struct pmu_add_cpu_aliases_map_data {
-       /* List being added to. */
-       struct list_head *head;
-       /* If a pmu_event lacks a given PMU the default used. */
-       char *default_pmu_name;
-       /* The PMU that we're searching for events for. */
-       struct perf_pmu *pmu;
-};
-
 static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe,
                                        const struct pmu_events_table *table __maybe_unused,
                                        void *vdata)
 {
-       struct pmu_add_cpu_aliases_map_data *data = vdata;
-       const char *pname = pe->pmu ?: data->default_pmu_name;
+       struct perf_pmu *pmu = vdata;
 
-       if (!strcmp(pname, data->pmu->name) ||
-           (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->pmu->name))) {
-               /* need type casts to override 'const' */
-               __perf_pmu__new_alias(data->head, -1, (char *)pe->name, (char *)pe->desc,
-                                     (char *)pe->event, pe);
-       }
+       perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, pe);
        return 0;
 }
 
@@ -770,68 +888,51 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe,
  * From the pmu_events_table, find the events that correspond to the given
  * PMU and add them to the list 'head'.
  */
-void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu,
-                       const struct pmu_events_table *table)
+void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_table *table)
 {
-       struct pmu_add_cpu_aliases_map_data data = {
-               .head = head,
-               .default_pmu_name = perf_pmus__default_pmu_name(),
-               .pmu = pmu,
-       };
-
-       pmu_events_table_for_each_event(table, pmu_add_cpu_aliases_map_callback, &data);
-       free(data.default_pmu_name);
+       pmu_events_table__for_each_event(table, pmu, pmu_add_cpu_aliases_map_callback, pmu);
 }
 
-static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+static void pmu_add_cpu_aliases(struct perf_pmu *pmu)
 {
-       const struct pmu_events_table *table;
+       if (!pmu->events_table)
+               return;
 
-       table = perf_pmu__find_events_table(pmu);
-       if (!table)
+       if (pmu->cpu_aliases_added)
                return;
 
-       pmu_add_cpu_aliases_table(head, pmu, table);
+       pmu_add_cpu_aliases_table(pmu, pmu->events_table);
+       pmu->cpu_aliases_added = true;
 }
 
-struct pmu_sys_event_iter_data {
-       struct list_head *head;
-       struct perf_pmu *pmu;
-};
-
 static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe,
                                       const struct pmu_events_table *table __maybe_unused,
-                                      void *data)
+                                      void *vdata)
 {
-       struct pmu_sys_event_iter_data *idata = data;
-       struct perf_pmu *pmu = idata->pmu;
+       struct perf_pmu *pmu = vdata;
 
        if (!pe->compat || !pe->pmu)
                return 0;
 
        if (!strcmp(pmu->id, pe->compat) &&
            pmu_uncore_alias_match(pe->pmu, pmu->name)) {
-               __perf_pmu__new_alias(idata->head, -1,
-                                     (char *)pe->name,
-                                     (char *)pe->desc,
-                                     (char *)pe->event,
-                                     pe);
+               perf_pmu__new_alias(pmu,
+                               pe->name,
+                               pe->desc,
+                               pe->event,
+                               /*val_fd=*/ NULL,
+                               pe);
        }
 
        return 0;
 }
 
-void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu)
+void pmu_add_sys_aliases(struct perf_pmu *pmu)
 {
-       struct pmu_sys_event_iter_data idata = {
-               .head = head,
-               .pmu = pmu,
-       };
-
        if (!pmu->id)
                return;
 
-       pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, &idata);
+       pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, pmu);
 }
 
 struct perf_event_attr * __weak
@@ -840,13 +941,13 @@ perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
        return NULL;
 }
 
-char * __weak
+const char * __weak
 pmu_find_real_name(const char *name)
 {
-       return (char *)name;
+       return name;
 }
 
-char * __weak
+const char * __weak
 pmu_find_alias_name(const char *name __maybe_unused)
 {
        return NULL;
@@ -863,40 +964,41 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
 struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name)
 {
        struct perf_pmu *pmu;
-       LIST_HEAD(format);
-       LIST_HEAD(aliases);
        __u32 type;
-       char *name = pmu_find_real_name(lookup_name);
-       char *alias_name;
-
-       /*
-        * The pmu data we store & need consists of the pmu
-        * type value and format definitions. Load both right
-        * now.
-        */
-       if (pmu_format(dirfd, name, &format))
-               return NULL;
-
-       /*
-        * Check the aliases first to avoid unnecessary work.
-        */
-       if (pmu_aliases(dirfd, name, &aliases))
-               return NULL;
+       const char *name = pmu_find_real_name(lookup_name);
+       const char *alias_name;
 
        pmu = zalloc(sizeof(*pmu));
        if (!pmu)
                return NULL;
 
-       pmu->is_core = is_pmu_core(name);
-       pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);
        pmu->name = strdup(name);
        if (!pmu->name)
                goto err;
 
-       /* Read type, and ensure that type value is successfully assigned (return 1) */
+       /*
+        * Read type early to fail fast if a lookup name isn't a PMU. Ensure
+        * that type value is successfully assigned (return 1).
+        */
        if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &type) != 1)
                goto err;
 
+       INIT_LIST_HEAD(&pmu->format);
+       INIT_LIST_HEAD(&pmu->aliases);
+       INIT_LIST_HEAD(&pmu->caps);
+
+       /*
+        * The pmu data we store & need consists of the pmu
+        * type value and format definitions. Load both right
+        * now.
+        */
+       if (pmu_format(pmu, dirfd, name)) {
+               free(pmu);
+               return NULL;
+       }
+       pmu->is_core = is_pmu_core(name);
+       pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);
+
        alias_name = pmu_find_alias_name(name);
        if (alias_name) {
                pmu->alias_name = strdup(alias_name);
@@ -909,14 +1011,8 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
        if (pmu->is_uncore)
                pmu->id = pmu_id(name);
        pmu->max_precise = pmu_max_precise(dirfd, pmu);
-       pmu_add_cpu_aliases(&aliases, pmu);
-       pmu_add_sys_aliases(&aliases, pmu);
-
-       INIT_LIST_HEAD(&pmu->format);
-       INIT_LIST_HEAD(&pmu->aliases);
-       INIT_LIST_HEAD(&pmu->caps);
-       list_splice(&format, &pmu->format);
-       list_splice(&aliases, &pmu->aliases);
+       pmu->events_table = perf_pmu__find_events_table(pmu);
+       pmu_add_sys_aliases(pmu);
        list_add_tail(&pmu->list, pmus);
 
        pmu->default_config = perf_pmu__get_default_config(pmu);
@@ -966,13 +1062,15 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu)
        if (pmu == &perf_pmu__fake)
                return;
 
-       list_for_each_entry(format, &pmu->format, list)
+       list_for_each_entry(format, &pmu->format, list) {
+               perf_pmu_format__load(pmu, format);
                if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) {
                        pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'"
                                   "which is not supported by this version of perf!\n",
                                   pmu->name, format->name, format->value);
                        return;
                }
+       }
 }
 
 bool evsel__is_aux_event(const struct evsel *evsel)
@@ -1000,7 +1098,7 @@ void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel,
        if (term)
                user_bits = term->val.cfg_chg;
 
-       bits = perf_pmu__format_bits(&pmu->format, config_name);
+       bits = perf_pmu__format_bits(pmu, config_name);
 
        /* Do nothing if the user changed the value */
        if (bits & user_bits)
@@ -1023,9 +1121,9 @@ pmu_find_format(struct list_head *formats, const char *name)
        return NULL;
 }
 
-__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
+__u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name)
 {
-       struct perf_pmu_format *format = pmu_find_format(formats, name);
+       struct perf_pmu_format *format = pmu_find_format(&pmu->format, name);
        __u64 bits = 0;
        int fbit;
 
@@ -1038,13 +1136,14 @@ __u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
        return bits;
 }
 
-int perf_pmu__format_type(struct list_head *formats, const char *name)
+int perf_pmu__format_type(struct perf_pmu *pmu, const char *name)
 {
-       struct perf_pmu_format *format = pmu_find_format(formats, name);
+       struct perf_pmu_format *format = pmu_find_format(&pmu->format, name);
 
        if (!format)
                return -1;
 
+       perf_pmu_format__load(pmu, format);
        return format->value;
 }
 
@@ -1135,8 +1234,7 @@ error:
  * Setup one of config[12] attr members based on the
  * user input data - term parameter.
  */
-static int pmu_config_term(const char *pmu_name,
-                          struct list_head *formats,
+static int pmu_config_term(struct perf_pmu *pmu,
                           struct perf_event_attr *attr,
                           struct parse_events_term *term,
                           struct list_head *head_terms,
@@ -1160,15 +1258,15 @@ static int pmu_config_term(const char *pmu_name,
        if (parse_events__is_hardcoded_term(term))
                return 0;
 
-       format = pmu_find_format(formats, term->config);
+       format = pmu_find_format(&pmu->format, term->config);
        if (!format) {
-               char *pmu_term = pmu_formats_string(formats);
+               char *pmu_term = pmu_formats_string(&pmu->format);
                char *unknown_term;
                char *help_msg;
 
                if (asprintf(&unknown_term,
                                "unknown term '%s' for pmu '%s'",
-                               term->config, pmu_name) < 0)
+                               term->config, pmu->name) < 0)
                        unknown_term = NULL;
                help_msg = parse_events_formats_error_string(pmu_term);
                if (err) {
@@ -1182,7 +1280,7 @@ static int pmu_config_term(const char *pmu_name,
                free(pmu_term);
                return -EINVAL;
        }
-
+       perf_pmu_format__load(pmu, format);
        switch (format->value) {
        case PERF_PMU_FORMAT_VALUE_CONFIG:
                vp = &attr->config;
@@ -1259,7 +1357,7 @@ static int pmu_config_term(const char *pmu_name,
        return 0;
 }
 
-int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
+int perf_pmu__config_terms(struct perf_pmu *pmu,
                           struct perf_event_attr *attr,
                           struct list_head *head_terms,
                           bool zero, struct parse_events_error *err)
@@ -1267,8 +1365,7 @@ int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
        struct parse_events_term *term;
 
        list_for_each_entry(term, head_terms, list) {
-               if (pmu_config_term(pmu_name, formats, attr, term, head_terms,
-                                   zero, err))
+               if (pmu_config_term(pmu, attr, term, head_terms, zero, err))
                        return -EINVAL;
        }
 
@@ -1286,25 +1383,25 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 {
        bool zero = !!pmu->default_config;
 
-       return perf_pmu__config_terms(pmu->name, &pmu->format, attr,
-                                     head_terms, zero, err);
+       return perf_pmu__config_terms(pmu, attr, head_terms, zero, err);
 }
 
 static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
                                             struct parse_events_term *term)
 {
        struct perf_pmu_alias *alias;
-       char *name;
+       const char *name;
 
        if (parse_events__is_hardcoded_term(term))
                return NULL;
 
        if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
-               if (term->val.num != 1)
+               if (!term->no_value)
                        return NULL;
                if (pmu_find_format(&pmu->format, term->config))
                        return NULL;
                name = term->config;
+
        } else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
                if (strcasecmp(term->config, "event"))
                        return NULL;
@@ -1313,26 +1410,51 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
                return NULL;
        }
 
-       list_for_each_entry(alias, &pmu->aliases, list) {
-               if (!strcasecmp(alias->name, name))
-                       return alias;
+       alias = perf_pmu__find_alias(pmu, name, /*load=*/ true);
+       if (alias || pmu->cpu_aliases_added)
+               return alias;
+
+       /* Alias doesn't exist, try to get it from the json events. */
+       if (pmu->events_table &&
+           pmu_events_table__find_event(pmu->events_table, pmu, name,
+                                        pmu_add_cpu_aliases_map_callback,
+                                        pmu) == 0) {
+               alias = perf_pmu__find_alias(pmu, name, /*load=*/ false);
        }
-       return NULL;
+       return alias;
 }
 
 
-static int check_info_data(struct perf_pmu_alias *alias,
-                          struct perf_pmu_info *info)
+static int check_info_data(struct perf_pmu *pmu,
+                          struct perf_pmu_alias *alias,
+                          struct perf_pmu_info *info,
+                          struct parse_events_error *err,
+                          int column)
 {
+       read_alias_info(pmu, alias);
        /*
         * Only one term in event definition can
         * define unit, scale and snapshot, fail
         * if there's more than one.
         */
-       if ((info->unit && alias->unit[0]) ||
-           (info->scale && alias->scale) ||
-           (info->snapshot && alias->snapshot))
+       if (info->unit && alias->unit[0]) {
+               parse_events_error__handle(err, column,
+                                       strdup("Attempt to set event's unit twice"),
+                                       NULL);
                return -EINVAL;
+       }
+       if (info->scale && alias->scale) {
+               parse_events_error__handle(err, column,
+                                       strdup("Attempt to set event's scale twice"),
+                                       NULL);
+               return -EINVAL;
+       }
+       if (info->snapshot && alias->snapshot) {
+               parse_events_error__handle(err, column,
+                                       strdup("Attempt to set event snapshot twice"),
+                                       NULL);
+               return -EINVAL;
+       }
 
        if (alias->unit[0])
                info->unit = alias->unit;
@@ -1351,7 +1473,7 @@ static int check_info_data(struct perf_pmu_alias *alias,
  * defined for the alias
  */
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
-                         struct perf_pmu_info *info)
+                         struct perf_pmu_info *info, struct parse_events_error *err)
 {
        struct parse_events_term *term, *h;
        struct perf_pmu_alias *alias;
@@ -1372,10 +1494,14 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
                if (!alias)
                        continue;
                ret = pmu_alias_terms(alias, &term->list);
-               if (ret)
+               if (ret) {
+                       parse_events_error__handle(err, term->err_term,
+                                               strdup("Failure to duplicate terms"),
+                                               NULL);
                        return ret;
+               }
 
-               ret = check_info_data(alias, info);
+               ret = check_info_data(pmu, alias, info, err, term->err_term);
                if (ret)
                        return ret;
 
@@ -1400,36 +1526,36 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
        return 0;
 }
 
-int perf_pmu__new_format(struct list_head *list, char *name,
-                        int config, unsigned long *bits)
-{
-       struct perf_pmu_format *format;
+struct find_event_args {
+       const char *event;
+       void *state;
+       pmu_event_callback cb;
+};
 
-       format = zalloc(sizeof(*format));
-       if (!format)
-               return -ENOMEM;
+static int find_event_callback(void *state, struct pmu_event_info *info)
+{
+       struct find_event_args *args = state;
 
-       format->name = strdup(name);
-       format->value = config;
-       memcpy(format->bits, bits, sizeof(format->bits));
+       if (!strcmp(args->event, info->name))
+               return args->cb(args->state, info);
 
-       list_add_tail(&format->list, list);
        return 0;
 }
 
-void perf_pmu__set_format(unsigned long *bits, long from, long to)
+int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb)
 {
-       long b;
-
-       if (!to)
-               to = from;
+       struct find_event_args args = {
+               .event = event,
+               .state = state,
+               .cb = cb,
+       };
 
-       memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
-       for (b = from; b <= to; b++)
-               __set_bit(b, bits);
+       /* Sub-optimal, but function is only used by tests. */
+       return perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/ false,
+                                       &args, find_event_callback);
 }
 
-void perf_pmu__del_formats(struct list_head *formats)
+static void perf_pmu__del_formats(struct list_head *formats)
 {
        struct perf_pmu_format *fmt, *tmp;
 
@@ -1466,15 +1592,145 @@ bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu)
        return !pmu->is_core || perf_pmus__num_core_pmus() == 1;
 }
 
-bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name)
+bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name)
 {
-       struct perf_pmu_alias *alias;
+       if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL)
+               return true;
+       if (pmu->cpu_aliases_added || !pmu->events_table)
+               return false;
+       return pmu_events_table__find_event(pmu->events_table, pmu, name, NULL, NULL) == 0;
+}
 
-       list_for_each_entry(alias, &pmu->aliases, list) {
-               if (!strcmp(alias->name, name))
-                       return true;
+size_t perf_pmu__num_events(struct perf_pmu *pmu)
+{
+       size_t nr;
+
+       if (!pmu->sysfs_aliases_loaded)
+               pmu_aliases_parse(pmu);
+
+       nr = pmu->sysfs_aliases;
+
+       if (pmu->cpu_aliases_added)
+                nr += pmu->loaded_json_aliases;
+       else if (pmu->events_table)
+               nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->loaded_json_aliases;
+
+       return pmu->selectable ? nr + 1 : nr;
+}
+
+static int sub_non_neg(int a, int b)
+{
+       if (b > a)
+               return 0;
+       return a - b;
+}
+
+static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
+                         const struct perf_pmu_alias *alias, bool skip_duplicate_pmus)
+{
+       struct parse_events_term *term;
+       int pmu_name_len = skip_duplicate_pmus
+               ? pmu_name_len_no_suffix(pmu->name, /*num=*/NULL)
+               : (int)strlen(pmu->name);
+       int used = snprintf(buf, len, "%.*s/%s", pmu_name_len, pmu->name, alias->name);
+
+       list_for_each_entry(term, &alias->terms, list) {
+               if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+                       used += snprintf(buf + used, sub_non_neg(len, used),
+                                       ",%s=%s", term->config,
+                                       term->val.str);
        }
-       return false;
+
+       if (sub_non_neg(len, used) > 0) {
+               buf[used] = '/';
+               used++;
+       }
+       if (sub_non_neg(len, used) > 0) {
+               buf[used] = '\0';
+               used++;
+       } else
+               buf[len - 1] = '\0';
+
+       return buf;
+}
+
+int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
+                            void *state, pmu_event_callback cb)
+{
+       char buf[1024];
+       struct perf_pmu_alias *event;
+       struct pmu_event_info info = {
+               .pmu = pmu,
+       };
+       int ret = 0;
+       struct strbuf sb;
+
+       strbuf_init(&sb, /*hint=*/ 0);
+       pmu_add_cpu_aliases(pmu);
+       list_for_each_entry(event, &pmu->aliases, list) {
+               size_t buf_used;
+
+               info.pmu_name = event->pmu_name ?: pmu->name;
+               info.alias = NULL;
+               if (event->desc) {
+                       info.name = event->name;
+                       buf_used = 0;
+               } else {
+                       info.name = format_alias(buf, sizeof(buf), pmu, event,
+                                                skip_duplicate_pmus);
+                       if (pmu->is_core) {
+                               info.alias = info.name;
+                               info.name = event->name;
+                       }
+                       buf_used = strlen(buf) + 1;
+               }
+               info.scale_unit = NULL;
+               if (strlen(event->unit) || event->scale != 1.0) {
+                       info.scale_unit = buf + buf_used;
+                       buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+                                       "%G%s", event->scale, event->unit) + 1;
+               }
+               info.desc = event->desc;
+               info.long_desc = event->long_desc;
+               info.encoding_desc = buf + buf_used;
+               parse_events_term__to_strbuf(&event->terms, &sb);
+               buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+                               "%s/%s/", info.pmu_name, sb.buf) + 1;
+               info.topic = event->topic;
+               info.str = sb.buf;
+               info.deprecated = event->deprecated;
+               ret = cb(state, &info);
+               if (ret)
+                       goto out;
+               strbuf_setlen(&sb, /*len=*/ 0);
+       }
+       if (pmu->selectable) {
+               info.name = buf;
+               snprintf(buf, sizeof(buf), "%s//", pmu->name);
+               info.alias = NULL;
+               info.scale_unit = NULL;
+               info.desc = NULL;
+               info.long_desc = NULL;
+               info.encoding_desc = NULL;
+               info.topic = NULL;
+               info.pmu_name = pmu->name;
+               info.deprecated = false;
+               ret = cb(state, &info);
+       }
+out:
+       strbuf_release(&sb);
+       return ret;
+}
+
+bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name)
+{
+       return !strcmp(pmu->name, pmu_name) ||
+               (pmu->is_uncore && pmu_uncore_alias_match(pmu_name, pmu->name)) ||
+               /*
+                * jevents and tests use default_core as a marker for any core
+                * PMU as the PMU name varies across architectures.
+                */
+               (pmu->is_core && !strcmp(pmu_name, "default_core"));
 }
 
 bool perf_pmu__is_software(const struct perf_pmu *pmu)
@@ -1710,7 +1966,7 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
                   name ?: "N/A", buf, config_name, config);
 }
 
-int perf_pmu__match(char *pattern, char *name, char *tok)
+int perf_pmu__match(const char *pattern, const char *name, const char *tok)
 {
        if (!name)
                return -1;
@@ -1756,17 +2012,19 @@ int perf_pmu__event_source_devices_fd(void)
  * then pathname will be filled with
  * "/sys/bus/event_source/devices/cs_etm/format"
  *
- * Return 0 if the sysfs mountpoint couldn't be found or if no
- * characters were written.
+ * Return 0 if the sysfs mountpoint couldn't be found, if no characters were
+ * written or if the buffer size is exceeded.
  */
 int perf_pmu__pathname_scnprintf(char *buf, size_t size,
                                 const char *pmu_name, const char *filename)
 {
-       char base_path[PATH_MAX];
+       size_t len;
 
-       if (!perf_pmu__event_source_devices_scnprintf(base_path, sizeof(base_path)))
+       len = perf_pmu__event_source_devices_scnprintf(buf, size);
+       if (!len || (len + strlen(pmu_name) + strlen(filename) + 1)  >= size)
                return 0;
-       return scnprintf(buf, size, "%s%s/%s", base_path, pmu_name, filename);
+
+       return scnprintf(buf + len, size - len, "%s/%s", pmu_name, filename);
 }
 
 int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags)
@@ -1788,5 +2046,23 @@ void perf_pmu__delete(struct perf_pmu *pmu)
        zfree(&pmu->default_config);
        zfree(&pmu->name);
        zfree(&pmu->alias_name);
+       zfree(&pmu->id);
        free(pmu);
 }
+
+struct perf_pmu *pmu__find_core_pmu(void)
+{
+       struct perf_pmu *pmu = NULL;
+
+       while ((pmu = perf_pmus__scan_core(pmu))) {
+               /*
+                * The cpumap should cover all CPUs. Otherwise, some CPUs may
+                * not support some events or have different event IDs.
+                */
+               if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu)
+                       return NULL;
+
+               return pmu;
+       }
+       return NULL;
+}
index 6b414ce..6a4e170 100644 (file)
@@ -39,7 +39,7 @@ struct perf_pmu_caps {
  */
 struct perf_pmu {
        /** @name: The name of the PMU such as "cpu". */
-       char *name;
+       const char *name;
        /**
         * @alias_name: Optional alternate name for the PMU determined in
         * architecture specific code.
@@ -49,7 +49,7 @@ struct perf_pmu {
         * @id: Optional PMU identifier read from
         * <sysfs>/bus/event_source/devices/<name>/identifier.
         */
-       char *id;
+       const char *id;
        /**
         * @type: Perf event attributed type value, read from
         * <sysfs>/bus/event_source/devices/<name>/type.
@@ -114,6 +114,21 @@ struct perf_pmu {
         * from json events in pmu-events.c.
         */
        struct list_head aliases;
+       /**
+        * @events_table: The events table for json events in pmu-events.c.
+        */
+       const struct pmu_events_table *events_table;
+       /** @sysfs_aliases: Number of sysfs aliases loaded. */
+       uint32_t sysfs_aliases;
+       /** @sysfs_aliases: Number of json event aliases loaded. */
+       uint32_t loaded_json_aliases;
+       /** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */
+       bool sysfs_aliases_loaded;
+       /**
+        * @cpu_aliases_added: Have all json events table entries for the PMU
+        * been added?
+        */
+       bool cpu_aliases_added;
        /** @caps_initialized: Has the list caps been initialized? */
        bool caps_initialized;
        /** @nr_caps: The length of the list caps. */
@@ -158,88 +173,49 @@ struct perf_pmu_info {
        bool snapshot;
 };
 
-#define UNIT_MAX_LEN   31 /* max length for event unit name */
-
-/**
- * struct perf_pmu_alias - An event either read from sysfs or builtin in
- * pmu-events.c, created by parsing the pmu-events json files.
- */
-struct perf_pmu_alias {
-       /** @name: Name of the event like "mem-loads". */
-       char *name;
-       /** @desc: Optional short description of the event. */
-       char *desc;
-       /** @long_desc: Optional long description. */
-       char *long_desc;
-       /**
-        * @topic: Optional topic such as cache or pipeline, particularly for
-        * json events.
-        */
-       char *topic;
-       /**
-        * @str: Comma separated parameter list like
-        * "event=0xcd,umask=0x1,ldlat=0x3".
-        */
-       char *str;
-       /** @terms: Owned list of the original parsed parameters. */
-       struct list_head terms;
-       /** @list: List element of struct perf_pmu aliases. */
-       struct list_head list;
-       /** @unit: Units for the event, such as bytes or cache lines. */
-       char unit[UNIT_MAX_LEN+1];
-       /** @scale: Value to scale read counter values by. */
-       double scale;
-       /**
-        * @per_pkg: Does the file
-        * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or
-        * equivalent json value exist and have the value 1.
-        */
-       bool per_pkg;
-       /**
-        * @snapshot: Does the file
-        * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.snapshot
-        * exist and have the value 1.
-        */
-       bool snapshot;
-       /**
-        * @deprecated: Is the event hidden and so not shown in perf list by
-        * default.
-        */
+struct pmu_event_info {
+       const struct perf_pmu *pmu;
+       const char *name;
+       const char* alias;
+       const char *scale_unit;
+       const char *desc;
+       const char *long_desc;
+       const char *encoding_desc;
+       const char *topic;
+       const char *pmu_name;
+       const char *str;
        bool deprecated;
-       /**
-        * @pmu_name: The name copied from the json struct pmu_event. This can
-        * differ from the PMU name as it won't have suffixes.
-        */
-       char *pmu_name;
 };
 
-void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu);
+typedef int (*pmu_event_callback)(void *state, struct pmu_event_info *info);
+
+void pmu_add_sys_aliases(struct perf_pmu *pmu);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
                     struct list_head *head_terms,
                     struct parse_events_error *error);
-int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
+int perf_pmu__config_terms(struct perf_pmu *pmu,
                           struct perf_event_attr *attr,
                           struct list_head *head_terms,
                           bool zero, struct parse_events_error *error);
-__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
-int perf_pmu__format_type(struct list_head *formats, const char *name);
+__u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name);
+int perf_pmu__format_type(struct perf_pmu *pmu, const char *name);
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
-                         struct perf_pmu_info *info);
-struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
-                                 struct list_head *head_terms);
-void perf_pmu_error(struct list_head *list, char *name, void *scanner, char const *msg);
+                         struct perf_pmu_info *info, struct parse_events_error *err);
+int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb);
 
-int perf_pmu__new_format(struct list_head *list, char *name,
-                        int config, unsigned long *bits);
-void perf_pmu__set_format(unsigned long *bits, long from, long to);
-int perf_pmu__format_parse(int dirfd, struct list_head *head);
-void perf_pmu__del_formats(struct list_head *formats);
+int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load);
+void perf_pmu_format__set_value(void *format, int config, unsigned long *bits);
 bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name);
 
 bool is_pmu_core(const char *name);
 bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu);
 bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu);
-bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name);
+bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name);
+size_t perf_pmu__num_events(struct perf_pmu *pmu);
+int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
+                            void *state, pmu_event_callback cb);
+bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name);
+
 /**
  * perf_pmu_is_software - is the PMU a software PMU as in it uses the
  *                        perf_sw_context in the kernel?
@@ -258,13 +234,12 @@ bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name);
 int perf_pmu__test(void);
 
 struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
-void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu,
+void pmu_add_cpu_aliases_table(struct perf_pmu *pmu,
                               const struct pmu_events_table *table);
 
 char *perf_pmu__getcpuid(struct perf_pmu *pmu);
 const struct pmu_events_table *pmu_events_table__find(void);
 const struct pmu_metrics_table *pmu_metrics_table__find(void);
-void perf_pmu_free_alias(struct perf_pmu_alias *alias);
 
 int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
 
@@ -275,10 +250,10 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
                                   const char *config_name);
 void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu);
 
-int perf_pmu__match(char *pattern, char *name, char *tok);
+int perf_pmu__match(const char *pattern, const char *name, const char *tok);
 
-char *pmu_find_real_name(const char *name);
-char *pmu_find_alias_name(const char *name);
+const char *pmu_find_real_name(const char *name);
+const char *pmu_find_alias_name(const char *name);
 double perf_pmu__cpu_slots_per_cycle(void);
 int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size);
 int perf_pmu__pathname_scnprintf(char *buf, size_t size,
@@ -289,5 +264,6 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename,
 struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name);
 struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus);
 void perf_pmu__delete(struct perf_pmu *pmu);
+struct perf_pmu *pmu__find_core_pmu(void);
 
 #endif /* __PMU_H */
index dff4e89..600c8c1 100644 (file)
@@ -1,6 +1,5 @@
 %define api.pure full
-%parse-param {struct list_head *format}
-%parse-param {char *name}
+%parse-param {void *format}
 %parse-param {void *scanner}
 %lex-param {void* scanner}
 
@@ -11,6 +10,9 @@
 #include <linux/bitmap.h>
 #include <string.h>
 #include "pmu.h"
+#include "pmu-bison.h"
+
+int perf_pmu_lex(YYSTYPE * yylval_param , void *yyscanner);
 
 #define ABORT_ON(val) \
 do { \
@@ -18,6 +20,20 @@ do { \
                 YYABORT; \
 } while (0)
 
+static void perf_pmu_error(void *format, void *scanner, const char *msg);
+
+static void perf_pmu__set_format(unsigned long *bits, long from, long to)
+{
+       long b;
+
+       if (!to)
+               to = from;
+
+       memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
+       for (b = from; b <= to; b++)
+               __set_bit(b, bits);
+}
+
 %}
 
 %token PP_CONFIG
@@ -42,16 +58,12 @@ format_term
 format_term:
 PP_CONFIG ':' bits
 {
-       ABORT_ON(perf_pmu__new_format(format, name,
-                                     PERF_PMU_FORMAT_VALUE_CONFIG,
-                                     $3));
+       perf_pmu_format__set_value(format, PERF_PMU_FORMAT_VALUE_CONFIG, $3);
 }
 |
 PP_CONFIG PP_VALUE ':' bits
 {
-       ABORT_ON(perf_pmu__new_format(format, name,
-                                     $2,
-                                     $4));
+       perf_pmu_format__set_value(format, $2, $4);
 }
 
 bits:
@@ -78,9 +90,8 @@ PP_VALUE
 
 %%
 
-void perf_pmu_error(struct list_head *list __maybe_unused,
-                   char *name __maybe_unused,
-                   void *scanner __maybe_unused,
-                   char const *msg __maybe_unused)
+static void perf_pmu_error(void *format __maybe_unused,
+                          void *scanner __maybe_unused,
+                          const char *msg __maybe_unused)
 {
 }
index c58ba9f..6631367 100644 (file)
@@ -1,8 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/string.h>
 #include <linux/zalloc.h>
 #include <subcmd/pager.h>
 #include <sys/types.h>
+#include <ctype.h>
 #include <dirent.h>
 #include <pthread.h>
 #include <string.h>
@@ -33,6 +36,31 @@ static LIST_HEAD(other_pmus);
 static bool read_sysfs_core_pmus;
 static bool read_sysfs_all_pmus;
 
+int pmu_name_len_no_suffix(const char *str, unsigned long *num)
+{
+       int orig_len, len;
+
+       orig_len = len = strlen(str);
+
+       /* Non-uncore PMUs have their full length, for example, i915. */
+       if (!strstarts(str, "uncore_"))
+               return len;
+
+       /*
+        * Count trailing digits and '_', if '_{num}' suffix isn't present use
+        * the full length.
+        */
+       while (len > 0 && isdigit(str[len - 1]))
+               len--;
+
+       if (len > 0 && len != orig_len && str[len - 1] == '_') {
+               if (num)
+                       *num = strtoul(&str[len], NULL, 10);
+               return len - 1;
+       }
+       return orig_len;
+}
+
 void perf_pmus__destroy(void)
 {
        struct perf_pmu *pmu, *tmp;
@@ -122,6 +150,25 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
        return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name);
 }
 
+static int pmus_cmp(void *priv __maybe_unused,
+                   const struct list_head *lhs, const struct list_head *rhs)
+{
+       unsigned long lhs_num = 0, rhs_num = 0;
+       struct perf_pmu *lhs_pmu = container_of(lhs, struct perf_pmu, list);
+       struct perf_pmu *rhs_pmu = container_of(rhs, struct perf_pmu, list);
+       const char *lhs_pmu_name = lhs_pmu->name ?: "";
+       const char *rhs_pmu_name = rhs_pmu->name ?: "";
+       int lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name, &lhs_num);
+       int rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name, &rhs_num);
+       int ret = strncmp(lhs_pmu_name, rhs_pmu_name,
+                       lhs_pmu_name_len < rhs_pmu_name_len ? lhs_pmu_name_len : rhs_pmu_name_len);
+
+       if (lhs_pmu_name_len != rhs_pmu_name_len || ret != 0 || lhs_pmu_name_len == 0)
+               return ret;
+
+       return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0);
+}
+
 /* Add all pmus in sysfs to pmu list: */
 static void pmu_read_sysfs(bool core_only)
 {
@@ -156,6 +203,8 @@ static void pmu_read_sysfs(bool core_only)
                if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
                        pr_err("Failure to set up any core PMUs\n");
        }
+       list_sort(NULL, &core_pmus, pmus_cmp);
+       list_sort(NULL, &other_pmus, pmus_cmp);
        if (!list_empty(&core_pmus)) {
                read_sysfs_core_pmus = true;
                if (!core_only)
@@ -227,6 +276,43 @@ struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
        return NULL;
 }
 
+static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu)
+{
+       bool use_core_pmus = !pmu || pmu->is_core;
+       int last_pmu_name_len = 0;
+       const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : "";
+
+       if (!pmu) {
+               pmu_read_sysfs(/*core_only=*/false);
+               pmu = list_prepare_entry(pmu, &core_pmus, list);
+       } else
+               last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", NULL);
+
+       if (use_core_pmus) {
+               list_for_each_entry_continue(pmu, &core_pmus, list) {
+                       int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", /*num=*/NULL);
+
+                       if (last_pmu_name_len == pmu_name_len &&
+                           !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len))
+                               continue;
+
+                       return pmu;
+               }
+               pmu = NULL;
+               pmu = list_prepare_entry(pmu, &other_pmus, list);
+       }
+       list_for_each_entry_continue(pmu, &other_pmus, list) {
+               int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", /*num=*/NULL);
+
+               if (last_pmu_name_len == pmu_name_len &&
+                   !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len))
+                       continue;
+
+               return pmu;
+       }
+       return NULL;
+}
+
 const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
 {
        struct perf_pmu *pmu = NULL;
@@ -258,219 +344,153 @@ int __weak perf_pmus__num_mem_pmus(void)
 struct sevent {
        /** PMU for event. */
        const struct perf_pmu *pmu;
-       /**
-        * Optional event for name, desc, etc. If not present then this is a
-        * selectable PMU and the event name is shown as "//".
-        */
-       const struct perf_pmu_alias *event;
-       /** Is the PMU for the CPU? */
-       bool is_cpu;
+       const char *name;
+       const char* alias;
+       const char *scale_unit;
+       const char *desc;
+       const char *long_desc;
+       const char *encoding_desc;
+       const char *topic;
+       const char *pmu_name;
+       bool deprecated;
 };
 
 static int cmp_sevent(const void *a, const void *b)
 {
        const struct sevent *as = a;
        const struct sevent *bs = b;
-       const char *a_pmu_name = NULL, *b_pmu_name = NULL;
-       const char *a_name = "//", *a_desc = NULL, *a_topic = "";
-       const char *b_name = "//", *b_desc = NULL, *b_topic = "";
+       bool a_iscpu, b_iscpu;
        int ret;
 
-       if (as->event) {
-               a_name = as->event->name;
-               a_desc = as->event->desc;
-               a_topic = as->event->topic ?: "";
-               a_pmu_name = as->event->pmu_name;
-       }
-       if (bs->event) {
-               b_name = bs->event->name;
-               b_desc = bs->event->desc;
-               b_topic = bs->event->topic ?: "";
-               b_pmu_name = bs->event->pmu_name;
-       }
        /* Put extra events last. */
-       if (!!a_desc != !!b_desc)
-               return !!a_desc - !!b_desc;
+       if (!!as->desc != !!bs->desc)
+               return !!as->desc - !!bs->desc;
 
        /* Order by topics. */
-       ret = strcmp(a_topic, b_topic);
+       ret = strcmp(as->topic ?: "", bs->topic ?: "");
        if (ret)
                return ret;
 
        /* Order CPU core events to be first */
-       if (as->is_cpu != bs->is_cpu)
-               return as->is_cpu ? -1 : 1;
+       a_iscpu = as->pmu ? as->pmu->is_core : true;
+       b_iscpu = bs->pmu ? bs->pmu->is_core : true;
+       if (a_iscpu != b_iscpu)
+               return a_iscpu ? -1 : 1;
 
        /* Order by PMU name. */
        if (as->pmu != bs->pmu) {
-               a_pmu_name = a_pmu_name ?: (as->pmu->name ?: "");
-               b_pmu_name = b_pmu_name ?: (bs->pmu->name ?: "");
-               ret = strcmp(a_pmu_name, b_pmu_name);
+               ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: "");
                if (ret)
                        return ret;
        }
 
        /* Order by event name. */
-       return strcmp(a_name, b_name);
+       return strcmp(as->name, bs->name);
 }
 
-static bool pmu_alias_is_duplicate(struct sevent *alias_a,
-                                  struct sevent *alias_b)
+static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b)
 {
-       const char *a_pmu_name = NULL, *b_pmu_name = NULL;
-       const char *a_name = "//", *b_name = "//";
-
-
-       if (alias_a->event) {
-               a_name = alias_a->event->name;
-               a_pmu_name = alias_a->event->pmu_name;
-       }
-       if (alias_b->event) {
-               b_name = alias_b->event->name;
-               b_pmu_name = alias_b->event->pmu_name;
-       }
-
        /* Different names -> never duplicates */
-       if (strcmp(a_name, b_name))
+       if (strcmp(a->name ?: "//", b->name ?: "//"))
                return false;
 
        /* Don't remove duplicates for different PMUs */
-       a_pmu_name = a_pmu_name ?: (alias_a->pmu->name ?: "");
-       b_pmu_name = b_pmu_name ?: (alias_b->pmu->name ?: "");
-       return strcmp(a_pmu_name, b_pmu_name) == 0;
+       return strcmp(a->pmu_name, b->pmu_name) == 0;
 }
 
-static int sub_non_neg(int a, int b)
-{
-       if (b > a)
-               return 0;
-       return a - b;
-}
+struct events_callback_state {
+       struct sevent *aliases;
+       size_t aliases_len;
+       size_t index;
+};
 
-static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
-                         const struct perf_pmu_alias *alias)
+static int perf_pmus__print_pmu_events__callback(void *vstate,
+                                               struct pmu_event_info *info)
 {
-       struct parse_events_term *term;
-       int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name);
-
-       list_for_each_entry(term, &alias->terms, list) {
-               if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
-                       used += snprintf(buf + used, sub_non_neg(len, used),
-                                       ",%s=%s", term->config,
-                                       term->val.str);
-       }
+       struct events_callback_state *state = vstate;
+       struct sevent *s;
 
-       if (sub_non_neg(len, used) > 0) {
-               buf[used] = '/';
-               used++;
+       if (state->index >= state->aliases_len) {
+               pr_err("Unexpected event %s/%s/\n", info->pmu->name, info->name);
+               return 1;
        }
-       if (sub_non_neg(len, used) > 0) {
-               buf[used] = '\0';
-               used++;
-       } else
-               buf[len - 1] = '\0';
-
-       return buf;
+       s = &state->aliases[state->index];
+       s->pmu = info->pmu;
+#define COPY_STR(str) s->str = info->str ? strdup(info->str) : NULL
+       COPY_STR(name);
+       COPY_STR(alias);
+       COPY_STR(scale_unit);
+       COPY_STR(desc);
+       COPY_STR(long_desc);
+       COPY_STR(encoding_desc);
+       COPY_STR(topic);
+       COPY_STR(pmu_name);
+#undef COPY_STR
+       s->deprecated = info->deprecated;
+       state->index++;
+       return 0;
 }
 
 void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
 {
        struct perf_pmu *pmu;
-       struct perf_pmu_alias *event;
-       char buf[1024];
        int printed = 0;
-       int len, j;
+       int len;
        struct sevent *aliases;
+       struct events_callback_state state;
+       bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state);
+       struct perf_pmu *(*scan_fn)(struct perf_pmu *);
+
+       if (skip_duplicate_pmus)
+               scan_fn = perf_pmus__scan_skip_duplicates;
+       else
+               scan_fn = perf_pmus__scan;
 
        pmu = NULL;
        len = 0;
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-               list_for_each_entry(event, &pmu->aliases, list)
-                       len++;
-               if (pmu->selectable)
-                       len++;
-       }
+       while ((pmu = scan_fn(pmu)) != NULL)
+               len += perf_pmu__num_events(pmu);
+
        aliases = zalloc(sizeof(struct sevent) * len);
        if (!aliases) {
                pr_err("FATAL: not enough memory to print PMU events\n");
                return;
        }
        pmu = NULL;
-       j = 0;
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-               bool is_cpu = pmu->is_core;
-
-               list_for_each_entry(event, &pmu->aliases, list) {
-                       aliases[j].event = event;
-                       aliases[j].pmu = pmu;
-                       aliases[j].is_cpu = is_cpu;
-                       j++;
-               }
-               if (pmu->selectable) {
-                       aliases[j].event = NULL;
-                       aliases[j].pmu = pmu;
-                       aliases[j].is_cpu = is_cpu;
-                       j++;
-               }
+       state = (struct events_callback_state) {
+               .aliases = aliases,
+               .aliases_len = len,
+               .index = 0,
+       };
+       while ((pmu = scan_fn(pmu)) != NULL) {
+               perf_pmu__for_each_event(pmu, skip_duplicate_pmus, &state,
+                                        perf_pmus__print_pmu_events__callback);
        }
-       len = j;
        qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
-       for (j = 0; j < len; j++) {
-               const char *name, *alias = NULL, *scale_unit = NULL,
-                       *desc = NULL, *long_desc = NULL,
-                       *encoding_desc = NULL, *topic = NULL,
-                       *pmu_name = NULL;
-               bool deprecated = false;
-               size_t buf_used;
-
+       for (int j = 0; j < len; j++) {
                /* Skip duplicates */
                if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1]))
                        continue;
 
-               if (!aliases[j].event) {
-                       /* A selectable event. */
-                       pmu_name = aliases[j].pmu->name;
-                       buf_used = snprintf(buf, sizeof(buf), "%s//", pmu_name) + 1;
-                       name = buf;
-               } else {
-                       if (aliases[j].event->desc) {
-                               name = aliases[j].event->name;
-                               buf_used = 0;
-                       } else {
-                               name = format_alias(buf, sizeof(buf), aliases[j].pmu,
-                                                   aliases[j].event);
-                               if (aliases[j].is_cpu) {
-                                       alias = name;
-                                       name = aliases[j].event->name;
-                               }
-                               buf_used = strlen(buf) + 1;
-                       }
-                       pmu_name = aliases[j].event->pmu_name ?: (aliases[j].pmu->name ?: "");
-                       if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) {
-                               scale_unit = buf + buf_used;
-                               buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
-                                               "%G%s", aliases[j].event->scale,
-                                               aliases[j].event->unit) + 1;
-                       }
-                       desc = aliases[j].event->desc;
-                       long_desc = aliases[j].event->long_desc;
-                       topic = aliases[j].event->topic;
-                       encoding_desc = buf + buf_used;
-                       buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
-                                       "%s/%s/", pmu_name, aliases[j].event->str) + 1;
-                       deprecated = aliases[j].event->deprecated;
-               }
                print_cb->print_event(print_state,
-                               pmu_name,
-                               topic,
-                               name,
-                               alias,
-                               scale_unit,
-                               deprecated,
+                               aliases[j].pmu_name,
+                               aliases[j].topic,
+                               aliases[j].name,
+                               aliases[j].alias,
+                               aliases[j].scale_unit,
+                               aliases[j].deprecated,
                                "Kernel PMU event",
-                               desc,
-                               long_desc,
-                               encoding_desc);
+                               aliases[j].desc,
+                               aliases[j].long_desc,
+                               aliases[j].encoding_desc);
+               zfree(&aliases[j].name);
+               zfree(&aliases[j].alias);
+               zfree(&aliases[j].scale_unit);
+               zfree(&aliases[j].desc);
+               zfree(&aliases[j].long_desc);
+               zfree(&aliases[j].encoding_desc);
+               zfree(&aliases[j].topic);
+               zfree(&aliases[j].pmu_name);
        }
        if (printed && pager_in_use())
                printf("\n");
index a214644..4c67153 100644 (file)
@@ -5,6 +5,8 @@
 struct perf_pmu;
 struct print_callbacks;
 
+int pmu_name_len_no_suffix(const char *str, unsigned long *num);
+
 void perf_pmus__destroy(void);
 
 struct perf_pmu *perf_pmus__find(const char *name);
index d7fab41..bf4290b 100644 (file)
@@ -26,6 +26,7 @@ struct print_callbacks {
                        const char *expr,
                        const char *threshold,
                        const char *unit);
+       bool (*skip_duplicate_pmus)(void *print_state);
 };
 
 /** Print all events, the default when no options are specified. */
index 16822a8..1a5b7fa 100644 (file)
@@ -53,6 +53,8 @@
 bool probe_event_dry_run;      /* Dry run flag */
 struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM };
 
+static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
+
 #define semantic_error(msg ...) pr_err("Semantic error :" msg)
 
 int e_snprintf(char *str, size_t size, const char *format, ...)
@@ -961,8 +963,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
        debuginfo__delete(dinfo);
 
        if (ntevs == 0) {       /* No error but failed to find probe point. */
-               pr_warning("Probe point '%s' not found.\n",
-                          synthesize_perf_probe_point(&pev->point));
+               char *probe_point = synthesize_perf_probe_point(&pev->point);
+               pr_warning("Probe point '%s' not found.\n", probe_point);
+               free(probe_point);
                return -ENODEV;
        } else if (ntevs < 0) {
                /* Error path : ntevs < 0 */
@@ -2009,7 +2012,7 @@ out:
 }
 
 /* Compose only probe point (not argument) */
-char *synthesize_perf_probe_point(struct perf_probe_point *pp)
+static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
 {
        struct strbuf buf;
        char *tmp, *ret = NULL;
@@ -2062,14 +2065,18 @@ char *synthesize_perf_probe_command(struct perf_probe_event *pev)
                        goto out;
 
        tmp = synthesize_perf_probe_point(&pev->point);
-       if (!tmp || strbuf_addstr(&buf, tmp) < 0)
+       if (!tmp || strbuf_addstr(&buf, tmp) < 0) {
+               free(tmp);
                goto out;
+       }
        free(tmp);
 
        for (i = 0; i < pev->nargs; i++) {
                tmp = synthesize_perf_probe_arg(pev->args + i);
-               if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0)
+               if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0) {
+                       free(tmp);
                        goto out;
+               }
                free(tmp);
        }
 
@@ -2800,13 +2807,18 @@ static void warn_uprobe_event_compat(struct probe_trace_event *tev)
        if (!tev->uprobes || tev->nargs == 0 || !buf)
                goto out;
 
-       for (i = 0; i < tev->nargs; i++)
-               if (strglobmatch(tev->args[i].value, "[$@+-]*")) {
-                       pr_warning("Please upgrade your kernel to at least "
-                                  "3.14 to have access to feature %s\n",
+       for (i = 0; i < tev->nargs; i++) {
+               if (strchr(tev->args[i].value, '@')) {
+                       pr_warning("%s accesses a variable by symbol name, but that is not supported for user application probe.\n",
                                   tev->args[i].value);
                        break;
                }
+               if (strglobmatch(tev->args[i].value, "[$+-]*")) {
+                       pr_warning("Please upgrade your kernel to at least 3.14 to have access to feature %s\n",
+                                  tev->args[i].value);
+                       break;
+               }
+       }
 out:
        free(buf);
 }
index 8ad5b15..7e3b6c3 100644 (file)
@@ -137,7 +137,6 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
 char *synthesize_perf_probe_command(struct perf_probe_event *pev);
 char *synthesize_probe_trace_command(struct probe_trace_event *tev);
 char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
-char *synthesize_perf_probe_point(struct perf_probe_point *pp);
 
 int perf_probe_event__copy(struct perf_probe_event *dst,
                           struct perf_probe_event *src);
index d4c9b4c..26e1c8d 100644 (file)
@@ -40,3 +40,12 @@ util/rwsem.c
 util/hashmap.c
 util/perf_regs.c
 util/fncache.c
+util/perf-regs-arch/perf_regs_aarch64.c
+util/perf-regs-arch/perf_regs_arm.c
+util/perf-regs-arch/perf_regs_csky.c
+util/perf-regs-arch/perf_regs_loongarch.c
+util/perf-regs-arch/perf_regs_mips.c
+util/perf-regs-arch/perf_regs_powerpc.c
+util/perf-regs-arch/perf_regs_riscv.c
+util/perf-regs-arch/perf_regs_s390.c
+util/perf-regs-arch/perf_regs_x86.c
index 4eed8ec..c29f5f0 100644 (file)
@@ -113,6 +113,11 @@ bool evsel__is_aux_event(const struct evsel *evsel __maybe_unused)
        return false;
 }
 
+bool perf_pmus__supports_extended_type(void)
+{
+       return false;
+}
+
 /*
  * Add this one here not to drag util/metricgroup.c
  */
index c10b891..115b16e 100644 (file)
@@ -27,7 +27,7 @@
 #include "color.h"
 #include "sample-raw.h"
 #include "s390-cpumcf-kernel.h"
-#include "pmu-events/pmu-events.h"
+#include "util/pmu.h"
 #include "util/sample.h"
 
 static size_t ctrset_size(struct cf_ctrset_entry *set)
@@ -132,56 +132,58 @@ static int get_counterset_start(int setnr)
 
 struct get_counter_name_data {
        int wanted;
-       const char *result;
+       char *result;
 };
 
-static int get_counter_name_callback(const struct pmu_event *evp,
-                                    const struct pmu_events_table *table __maybe_unused,
-                                    void *vdata)
+static int get_counter_name_callback(void *vdata, struct pmu_event_info *info)
 {
        struct get_counter_name_data *data = vdata;
        int rc, event_nr;
+       const char *event_str;
 
-       if (evp->name == NULL || evp->event == NULL)
+       if (info->str == NULL)
                return 0;
-       rc = sscanf(evp->event, "event=%x", &event_nr);
+
+       event_str = strstr(info->str, "event=");
+       if (!event_str)
+               return 0;
+
+       rc = sscanf(event_str, "event=%x", &event_nr);
        if (rc == 1 && event_nr == data->wanted) {
-               data->result = evp->name;
+               data->result = strdup(info->name);
                return 1; /* Terminate the search. */
        }
        return 0;
 }
 
-/* Scan the PMU table and extract the logical name of a counter from the
- * PMU events table. Input is the counter set and counter number with in the
- * set. Construct the event number and use this as key. If they match return
- * the name of this counter.
+/* Scan the PMU and extract the logical name of a counter from the event. Input
+ * is the counter set and counter number with in the set. Construct the event
+ * number and use this as key. If they match return the name of this counter.
  * If no match is found a NULL pointer is returned.
  */
-static const char *get_counter_name(int set, int nr, const struct pmu_events_table *table)
+static char *get_counter_name(int set, int nr, struct perf_pmu *pmu)
 {
        struct get_counter_name_data data = {
                .wanted = get_counterset_start(set) + nr,
                .result = NULL,
        };
 
-       if (!table)
+       if (!pmu)
                return NULL;
 
-       pmu_events_table_for_each_event(table, get_counter_name_callback, &data);
+       perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/ true,
+                                &data, get_counter_name_callback);
        return data.result;
 }
 
-static void s390_cpumcfdg_dump(struct perf_sample *sample)
+static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample)
 {
        size_t i, len = sample->raw_size, offset = 0;
        unsigned char *buf = sample->raw_data;
        const char *color = PERF_COLOR_BLUE;
        struct cf_ctrset_entry *cep, ce;
-       const struct pmu_events_table *table;
        u64 *p;
 
-       table = pmu_events_table__find();
        while (offset < len) {
                cep = (struct cf_ctrset_entry *)(buf + offset);
 
@@ -199,11 +201,12 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample)
                color_fprintf(stdout, color, "    [%#08zx] Counterset:%d"
                              " Counters:%d\n", offset, ce.set, ce.ctr);
                for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; ++i, ++p) {
-                       const char *ev_name = get_counter_name(ce.set, i, table);
+                       char *ev_name = get_counter_name(ce.set, i, pmu);
 
                        color_fprintf(stdout, color,
                                      "\tCounter:%03d %s Value:%#018lx\n", i,
                                      ev_name ?: "<unknown>", be64_to_cpu(*p));
+                       free(ev_name);
                }
                offset += ctrset_size(&ce);
        }
@@ -216,14 +219,14 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample)
  */
 void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
 {
-       struct evsel *ev_bc000;
+       struct evsel *evsel;
 
        if (event->header.type != PERF_RECORD_SAMPLE)
                return;
 
-       ev_bc000 = evlist__event2evsel(evlist, event);
-       if (ev_bc000 == NULL ||
-           ev_bc000->core.attr.config != PERF_EVENT_CPUM_CF_DIAG)
+       evsel = evlist__event2evsel(evlist, event);
+       if (evsel == NULL ||
+           evsel->core.attr.config != PERF_EVENT_CPUM_CF_DIAG)
                return;
 
        /* Display raw data on screen */
@@ -231,5 +234,5 @@ void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, str
                pr_err("Invalid counter set data encountered\n");
                return;
        }
-       s390_cpumcfdg_dump(sample);
+       s390_cpumcfdg_dump(evsel->pmu, sample);
 }
index c220fec..586b94e 100644 (file)
@@ -5,4 +5,5 @@ perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
 
 CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
 
-CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum
+# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance)
+CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum -Wno-declaration-after-statement
index 00d18c7..1e9aa8e 100644 (file)
@@ -833,8 +833,8 @@ static void perf_event__hdr_attr_swap(union perf_event *event,
        perf_event__attr_swap(&event->attr.attr);
 
        size = event->header.size;
-       size -= (void *)&event->attr.id - (void *)event;
-       mem_bswap_64(event->attr.id, size);
+       size -= perf_record_header_attr_id(event) - (void *)event;
+       mem_bswap_64(perf_record_header_attr_id(event), size);
 }
 
 static void perf_event__event_update_swap(union perf_event *event,
index 869738f..79d5e29 100644 (file)
@@ -66,6 +66,9 @@ if cc_is_clang:
 else:
     cflags += ['-Wno-cast-function-type' ]
 
+# The python headers have mixed code with declarations (decls after asserts, for instance)
+cflags += [ "-Wno-declaration-after-statement" ]
+
 src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
index d45d5dc..afe6db8 100644 (file)
@@ -578,7 +578,7 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused
        if (!valid_only_metric(unit))
                return;
        unit = fixunit(tbuf, os->evsel, unit);
-       snprintf(buf, sizeof buf, fmt, val);
+       snprintf(buf, sizeof(buf), fmt ?: "", val);
        ends = vals = skip_spaces(buf);
        while (isdigit(*ends) || *ends == '.')
                ends++;
@@ -600,7 +600,7 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse
        if (!valid_only_metric(unit))
                return;
        unit = fixunit(tbuf, os->evsel, unit);
-       snprintf(buf, sizeof(buf), fmt, val);
+       snprintf(buf, sizeof(buf), fmt ?: "", val);
        ends = vals = skip_spaces(buf);
        while (isdigit(*ends) || *ends == '.')
                ends++;
index 967e583..ec35060 100644 (file)
@@ -729,7 +729,7 @@ size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
 
 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
 {
-       struct perf_stat_config sc;
+       struct perf_stat_config sc = {};
        size_t ret;
 
        perf_event__read_stat_config(&sc, &event->stat_config);
index 5c62d31..0e4dc31 100644 (file)
@@ -331,7 +331,7 @@ static char *cpu_model(void)
        file = fopen("/proc/cpuinfo", "r");
        if (file) {
                while (fgets(buf, 255, file)) {
-                       if (strstr(buf, "model name")) {
+                       if (strcasestr(buf, "model name")) {
                                strlcpy(cpu_m, &buf[13], 255);
                                break;
                        }
index 8bd466d..95e99c3 100644 (file)
@@ -1440,6 +1440,8 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
                curr_dso->kernel = dso->kernel;
                curr_dso->long_name = dso->long_name;
                curr_dso->long_name_len = dso->long_name_len;
+               curr_dso->binary_type = dso->binary_type;
+               curr_dso->adjust_symbols = dso->adjust_symbols;
                curr_map = map__new2(start, curr_dso);
                dso__put(curr_dso);
                if (curr_map == NULL)
index f849f9e..3f36675 100644 (file)
@@ -2204,15 +2204,20 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
        if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type))
                return -1;
 
+       /*
+        * dso__load_sym() may copy 'dso' which will result in the copies having
+        * an incorrect long name unless we set it here first.
+        */
+       dso__set_long_name(dso, vmlinux, vmlinux_allocated);
+       if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
+               dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+       else
+               dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
+
        err = dso__load_sym(dso, map, &ss, &ss, 0);
        symsrc__destroy(&ss);
 
        if (err > 0) {
-               if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
-                       dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
-               else
-                       dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
-               dso__set_long_name(dso, vmlinux, vmlinux_allocated);
                dso__set_loaded(dso);
                pr_debug("Using %s for symbols\n", symfs_vmlinux);
        }
index 45714a2..a0579c7 100644 (file)
@@ -2145,7 +2145,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *
                return -ENOMEM;
 
        ev->attr.attr = *attr;
-       memcpy(ev->attr.id, id, ids * sizeof(u64));
+       memcpy(perf_record_header_attr_id(ev), id, ids * sizeof(u64));
 
        ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
        ev->attr.header.size = (u16)size;
index 0b16640..fe5e699 100644 (file)
@@ -80,6 +80,15 @@ err_thread:
        return NULL;
 }
 
+static void (*thread__priv_destructor)(void *priv);
+
+void thread__set_priv_destructor(void (*destructor)(void *priv))
+{
+       assert(thread__priv_destructor == NULL);
+
+       thread__priv_destructor = destructor;
+}
+
 void thread__delete(struct thread *thread)
 {
        struct namespaces *namespaces, *tmp_namespaces;
@@ -112,6 +121,10 @@ void thread__delete(struct thread *thread)
        exit_rwsem(thread__namespaces_lock(thread));
        exit_rwsem(thread__comm_lock(thread));
        thread__free_stitch_list(thread);
+
+       if (thread__priv_destructor)
+               thread__priv_destructor(thread__priv(thread));
+
        RC_CHK_FREE(thread);
 }
 
index 9068a21..e79225a 100644 (file)
@@ -71,6 +71,8 @@ struct thread *thread__new(pid_t pid, pid_t tid);
 int thread__init_maps(struct thread *thread, struct machine *machine);
 void thread__delete(struct thread *thread);
 
+void thread__set_priv_destructor(void (*destructor)(void *priv));
+
 struct thread *thread__get(struct thread *thread);
 void thread__put(struct thread *thread);
 
index 2a96df4..8554db3 100644 (file)
@@ -17,6 +17,7 @@
 #include "event.h"
 #include "perf_regs.h"
 #include "callchain.h"
+#include "util/env.h"
 
 static char *debuginfo_path;
 
@@ -170,12 +171,14 @@ static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *
                        void *arg)
 {
        struct unwind_info *ui = arg;
+       const char *arch = perf_env__arch(ui->machine->env);
        struct stack_dump *stack = &ui->sample->user_stack;
        u64 start, end;
        int offset;
        int ret;
 
-       ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
+       ret = perf_reg_value(&start, &ui->sample->user_regs,
+                            perf_arch_reg_sp(arch));
        if (ret)
                return false;
 
@@ -253,6 +256,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
                .max_stack      = max_stack,
                .best_effort    = best_effort
        };
+       const char *arch = perf_env__arch(ui_buf.machine->env);
        Dwarf_Word ip;
        int err = -EINVAL, i;
 
@@ -269,7 +273,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
        if (!ui->dwfl)
                goto out;
 
-       err = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP);
+       err = perf_reg_value(&ip, &data->user_regs, perf_arch_reg_ip(arch));
        if (err)
                goto out;
 
index ebfde53..c064188 100644 (file)
@@ -553,6 +553,7 @@ static int access_mem(unw_addr_space_t __maybe_unused as,
                      int __write, void *arg)
 {
        struct unwind_info *ui = arg;
+       const char *arch = perf_env__arch(ui->machine->env);
        struct stack_dump *stack = &ui->sample->user_stack;
        u64 start, end;
        int offset;
@@ -565,7 +566,7 @@ static int access_mem(unw_addr_space_t __maybe_unused as,
        }
 
        ret = perf_reg_value(&start, &ui->sample->user_regs,
-                            LIBUNWIND__ARCH_REG_SP);
+                            perf_arch_reg_sp(arch));
        if (ret)
                return ret;
 
@@ -714,6 +715,7 @@ static void _unwind__finish_access(struct maps *maps)
 static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
                       void *arg, int max_stack)
 {
+       const char *arch = perf_env__arch(ui->machine->env);
        u64 val;
        unw_word_t ips[max_stack];
        unw_addr_space_t addr_space;
@@ -721,7 +723,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
        int ret, i = 0;
 
        ret = perf_reg_value(&val, &ui->sample->user_regs,
-                            LIBUNWIND__ARCH_REG_IP);
+                            perf_arch_reg_ip(arch));
        if (ret)
                return ret;
 
index b2a03fa..9f7164c 100644 (file)
@@ -42,14 +42,6 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
 #define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum)
 #endif
 
-#ifndef LIBUNWIND__ARCH_REG_SP
-#define LIBUNWIND__ARCH_REG_SP PERF_REG_SP
-#endif
-
-#ifndef LIBUNWIND__ARCH_REG_IP
-#define LIBUNWIND__ARCH_REG_IP PERF_REG_IP
-#endif
-
 int LIBUNWIND__ARCH_REG_ID(int regnum);
 int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized);
 void unwind__flush_access(struct maps *maps);
index dc53180..b53753d 100644 (file)
@@ -57,7 +57,7 @@ LIB_MIN=                      1
 
 PACKAGE =                      cpupower
 PACKAGE_BUGREPORT =            linux-pm@vger.kernel.org
-LANGUAGES =                    de fr it cs pt
+LANGUAGES =                    de fr it cs pt ka
 
 
 # Directory definitions. These are default and most probably
index 172e472..d69d034 100644 (file)
@@ -177,3 +177,23 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
 endef
 _ge_attempt = $(or $(get-executable),$(call _gea_err,$(2)))
 _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
+
+# version-ge3
+#
+# Usage $(call version-ge3,2.6.4,$(FLEX_VERSION))
+#
+# To compare if a 3 component version is greater or equal to another, first use
+# was to check the flex version to see if we can use compiler warnings as
+# errors for one of the cases flex generates code C compilers complains about.
+
+version-ge3 = $(shell echo "$(1).$(2)" | awk -F'.' '{ printf("%d\n", (10000000 * $$1 + 10000 * $$2 + $$3) >= (10000000 * $$4 + 10000 * $$5 + $$6)) }')
+
+# version-lt3
+#
+# Usage $(call version-lt3,2.6.2,$(FLEX_VERSION))
+#
+# To compare if a 3 component version is less thjan another, first use was to
+# check the flex version to see if we can use compiler warnings as errors for
+# one of the cases flex generates code C compilers complains about.
+
+version-lt3 = $(shell echo "$(1).$(2)" | awk -F'.' '{ printf("%d\n", (10000000 * $$1 + 10000 * $$2 + $$3) < (10000000 * $$4 + 10000 * $$5 + $$6)) }')
index e00520c..cffaf22 100644 (file)
@@ -159,7 +159,7 @@ void multiorder_tagged_iteration(struct xarray *xa)
        item_kill_tree(xa);
 }
 
-bool stop_iteration = false;
+bool stop_iteration;
 
 static void *creator_func(void *ptr)
 {
@@ -201,6 +201,7 @@ static void multiorder_iteration_race(struct xarray *xa)
        pthread_t worker_thread[num_threads];
        int i;
 
+       stop_iteration = false;
        pthread_create(&worker_thread[0], NULL, &creator_func, xa);
        for (i = 1; i < num_threads; i++)
                pthread_create(&worker_thread[i], NULL, &iterator_func, xa);
@@ -211,6 +212,61 @@ static void multiorder_iteration_race(struct xarray *xa)
        item_kill_tree(xa);
 }
 
+static void *load_creator(void *ptr)
+{
+       /* 'order' is set up to ensure we have sibling entries */
+       unsigned int order;
+       struct radix_tree_root *tree = ptr;
+       int i;
+
+       rcu_register_thread();
+       item_insert_order(tree, 3 << RADIX_TREE_MAP_SHIFT, 0);
+       item_insert_order(tree, 2 << RADIX_TREE_MAP_SHIFT, 0);
+       for (i = 0; i < 10000; i++) {
+               for (order = 1; order < RADIX_TREE_MAP_SHIFT; order++) {
+                       unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) -
+                                               (1 << order);
+                       item_insert_order(tree, index, order);
+                       item_delete_rcu(tree, index);
+               }
+       }
+       rcu_unregister_thread();
+
+       stop_iteration = true;
+       return NULL;
+}
+
+static void *load_worker(void *ptr)
+{
+       unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - 1;
+
+       rcu_register_thread();
+       while (!stop_iteration) {
+               struct item *item = xa_load(ptr, index);
+               assert(!xa_is_internal(item));
+       }
+       rcu_unregister_thread();
+
+       return NULL;
+}
+
+static void load_race(struct xarray *xa)
+{
+       const int num_threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
+       pthread_t worker_thread[num_threads];
+       int i;
+
+       stop_iteration = false;
+       pthread_create(&worker_thread[0], NULL, &load_creator, xa);
+       for (i = 1; i < num_threads; i++)
+               pthread_create(&worker_thread[i], NULL, &load_worker, xa);
+
+       for (i = 0; i < num_threads; i++)
+               pthread_join(worker_thread[i], NULL);
+
+       item_kill_tree(xa);
+}
+
 static DEFINE_XARRAY(array);
 
 void multiorder_checks(void)
@@ -218,12 +274,20 @@ void multiorder_checks(void)
        multiorder_iteration(&array);
        multiorder_tagged_iteration(&array);
        multiorder_iteration_race(&array);
+       load_race(&array);
 
        radix_tree_cpu_dead(0);
 }
 
-int __weak main(void)
+int __weak main(int argc, char **argv)
 {
+       int opt;
+
+       while ((opt = getopt(argc, argv, "ls:v")) != -1) {
+               if (opt == 'v')
+                       test_verbose++;
+       }
+
        rcu_register_thread();
        radix_tree_init();
        multiorder_checks();
index edef49f..caede9b 100644 (file)
@@ -50,14 +50,17 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
        test_cgroup_storage \
        test_tcpnotify_user test_sysctl \
        test_progs-no_alu32
+TEST_INST_SUBDIRS := no_alu32
 
 # Also test bpf-gcc, if present
 ifneq ($(BPF_GCC),)
 TEST_GEN_PROGS += test_progs-bpf_gcc
+TEST_INST_SUBDIRS += bpf_gcc
 endif
 
 ifneq ($(CLANG_CPUV4),)
 TEST_GEN_PROGS += test_progs-cpuv4
+TEST_INST_SUBDIRS += cpuv4
 endif
 
 TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
@@ -714,3 +717,12 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)     \
 
 # Delete partially updated (corrupted) files on error
 .DELETE_ON_ERROR:
+
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
+override define INSTALL_RULE
+       $(DEFAULT_INSTALL_RULE)
+       @for DIR in $(TEST_INST_SUBDIRS); do              \
+               mkdir -p $(INSTALL_PATH)/$$DIR;   \
+               rsync -a $(OUTPUT)/$$DIR/*.bpf.o $(INSTALL_PATH)/$$DIR;\
+       done
+endef
index 31f1e81..ee0458a 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/unistd.h>
 #include <linux/mount.h>
 #include <sys/syscall.h>
+#include "bpf/libbpf_internal.h"
 
 static inline int sys_fsopen(const char *fsname, unsigned flags)
 {
@@ -155,7 +156,7 @@ static void validate_pin(int map_fd, const char *map_name, int src_value,
        ASSERT_OK(err, "obj_pin");
 
        /* cleanup */
-       if (pin_opts.path_fd >= 0)
+       if (path_kind == PATH_FD_REL && pin_opts.path_fd >= 0)
                close(pin_opts.path_fd);
        if (old_cwd[0])
                ASSERT_OK(chdir(old_cwd), "restore_cwd");
@@ -220,7 +221,7 @@ static void validate_get(int map_fd, const char *map_name, int src_value,
                goto cleanup;
 
        /* cleanup */
-       if (get_opts.path_fd >= 0)
+       if (path_kind == PATH_FD_REL && get_opts.path_fd >= 0)
                close(get_opts.path_fd);
        if (old_cwd[0])
                ASSERT_OK(chdir(old_cwd), "restore_cwd");
index 911345c..ccc7685 100644 (file)
 #include "test_d_path_check_rdonly_mem.skel.h"
 #include "test_d_path_check_types.skel.h"
 
+/* sys_close_range is not around for long time, so let's
+ * make sure we can call it on systems with older glibc
+ */
+#ifndef __NR_close_range
+#ifdef __alpha__
+#define __NR_close_range 546
+#else
+#define __NR_close_range 436
+#endif
+#endif
+
 static int duration;
 
 static struct {
@@ -90,7 +101,11 @@ static int trigger_fstat_events(pid_t pid)
        fstat(indicatorfd, &fileStat);
 
 out_close:
-       /* triggers filp_close */
+       /* sys_close no longer triggers filp_close, but we can
+        * call sys_close_range instead which still does
+        */
+#define close(fd) syscall(__NR_close_range, fd, fd, 0)
+
        close(pipefd[0]);
        close(pipefd[1]);
        close(sockfd);
@@ -98,6 +113,8 @@ out_close:
        close(devfd);
        close(localfd);
        close(indicatorfd);
+
+#undef close
        return ret;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c
new file mode 100644 (file)
index 0000000..f35852d
--- /dev/null
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "sk_storage_omem_uncharge.skel.h"
+
+void test_sk_storage_omem_uncharge(void)
+{
+       struct sk_storage_omem_uncharge *skel;
+       int sk_fd = -1, map_fd, err, value;
+       socklen_t optlen;
+
+       skel = sk_storage_omem_uncharge__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+               return;
+       map_fd = bpf_map__fd(skel->maps.sk_storage);
+
+       /* A standalone socket not binding to addr:port,
+        * so nentns is not needed.
+        */
+       sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
+       if (!ASSERT_GE(sk_fd, 0, "socket"))
+               goto done;
+
+       optlen = sizeof(skel->bss->cookie);
+       err = getsockopt(sk_fd, SOL_SOCKET, SO_COOKIE, &skel->bss->cookie, &optlen);
+       if (!ASSERT_OK(err, "getsockopt(SO_COOKIE)"))
+               goto done;
+
+       value = 0;
+       err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+       if (!ASSERT_OK(err, "bpf_map_update_elem(value=0)"))
+               goto done;
+
+       value = 0xdeadbeef;
+       err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+       if (!ASSERT_OK(err, "bpf_map_update_elem(value=0xdeadbeef)"))
+               goto done;
+
+       err = sk_storage_omem_uncharge__attach(skel);
+       if (!ASSERT_OK(err, "attach"))
+               goto done;
+
+       close(sk_fd);
+       sk_fd = -1;
+
+       ASSERT_EQ(skel->bss->cookie_found, 2, "cookie_found");
+       ASSERT_EQ(skel->bss->omem, 0, "omem");
+
+done:
+       sk_storage_omem_uncharge__destroy(skel);
+       if (sk_fd != -1)
+               close(sk_fd);
+}
index d126654..36d829a 100644 (file)
                __ret;                                                         \
        })
 
+static inline int poll_connect(int fd, unsigned int timeout_sec)
+{
+       struct timeval timeout = { .tv_sec = timeout_sec };
+       fd_set wfds;
+       int r, eval;
+       socklen_t esize = sizeof(eval);
+
+       FD_ZERO(&wfds);
+       FD_SET(fd, &wfds);
+
+       r = select(fd + 1, NULL, &wfds, NULL, &timeout);
+       if (r == 0)
+               errno = ETIME;
+       if (r != 1)
+               return -1;
+
+       if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
+               return -1;
+       if (eval != 0) {
+               errno = eval;
+               return -1;
+       }
+
+       return 0;
+}
+
 static inline int poll_read(int fd, unsigned int timeout_sec)
 {
        struct timeval timeout = { .tv_sec = timeout_sec };
index 5674a9d..8df8cbb 100644 (file)
@@ -1452,11 +1452,18 @@ static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
        if (p < 0)
                goto close_cli;
 
+       if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
+               FAIL_ERRNO("poll_connect");
+               goto close_acc;
+       }
+
        *v0 = p;
        *v1 = c;
 
        return 0;
 
+close_acc:
+       close(p);
 close_cli:
        close(c);
 close_srv:
index cfed4df..0b793a1 100644 (file)
@@ -88,6 +88,7 @@
 #define sk_v6_rcv_saddr                __sk_common.skc_v6_rcv_saddr
 #define sk_flags               __sk_common.skc_flags
 #define sk_reuse               __sk_common.skc_reuse
+#define sk_cookie              __sk_common.skc_cookie
 
 #define s6_addr32              in6_u.u6_addr32
 
diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
new file mode 100644 (file)
index 0000000..3e74579
--- /dev/null
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+void *local_storage_ptr = NULL;
+void *sk_ptr = NULL;
+int cookie_found = 0;
+__u64 cookie = 0;
+__u32 omem = 0;
+
+void *bpf_rdonly_cast(void *, __u32) __ksym;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __type(key, int);
+       __type(value, int);
+} sk_storage SEC(".maps");
+
+SEC("fexit/bpf_local_storage_destroy")
+int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
+{
+       struct sock *sk;
+
+       if (local_storage_ptr != local_storage)
+               return 0;
+
+       sk = bpf_rdonly_cast(sk_ptr, bpf_core_type_id_kernel(struct sock));
+       if (sk->sk_cookie.counter != cookie)
+               return 0;
+
+       cookie_found++;
+       omem = sk->sk_omem_alloc.counter;
+       local_storage_ptr = NULL;
+
+       return 0;
+}
+
+SEC("fentry/inet6_sock_destruct")
+int BPF_PROG(inet6_sock_destruct, struct sock *sk)
+{
+       int *value;
+
+       if (!cookie || sk->sk_cookie.counter != cookie)
+               return 0;
+
+       value = bpf_sk_storage_get(&sk_storage, sk, 0, 0);
+       if (value && *value == 0xdeadbeef) {
+               cookie_found++;
+               sk_ptr = sk;
+               local_storage_ptr = sk->sk_bpf_storage;
+       }
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
index c692cc8..a3bb36f 100644 (file)
@@ -23,6 +23,7 @@ LIBKVM += lib/guest_modes.c
 LIBKVM += lib/io.c
 LIBKVM += lib/kvm_util.c
 LIBKVM += lib/memstress.c
+LIBKVM += lib/guest_sprintf.c
 LIBKVM += lib/rbtree.c
 LIBKVM += lib/sparsebit.c
 LIBKVM += lib/test_util.c
@@ -122,6 +123,7 @@ TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += guest_print_test
 TEST_GEN_PROGS_x86_64 += hardware_disable_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += kvm_page_table_test
@@ -140,7 +142,6 @@ TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
 TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
 TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
 TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
 TEST_GEN_PROGS_aarch64 += aarch64/psci_test
@@ -152,6 +153,8 @@ TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
+TEST_GEN_PROGS_aarch64 += guest_print_test
+TEST_GEN_PROGS_aarch64 += get-reg-list
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_aarch64 += kvm_page_table_test
 TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
@@ -166,8 +169,10 @@ TEST_GEN_PROGS_s390x += s390x/resets
 TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += s390x/tprot
 TEST_GEN_PROGS_s390x += s390x/cmma_test
+TEST_GEN_PROGS_s390x += s390x/debug_test
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
+TEST_GEN_PROGS_s390x += guest_print_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
 TEST_GEN_PROGS_s390x += kvm_page_table_test
 TEST_GEN_PROGS_s390x += rseq_test
@@ -176,11 +181,15 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test
 
 TEST_GEN_PROGS_riscv += demand_paging_test
 TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += guest_print_test
+TEST_GEN_PROGS_riscv += get-reg-list
 TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
 TEST_GEN_PROGS_riscv += kvm_page_table_test
 TEST_GEN_PROGS_riscv += set_memory_region_test
 TEST_GEN_PROGS_riscv += kvm_binary_stats_test
 
+SPLIT_TESTS += get-reg-list
+
 TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
 TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
@@ -204,6 +213,7 @@ endif
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
        -Wno-gnu-variable-sized-type-not-at-end -MD\
        -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
+       -fno-builtin-strnlen \
        -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
        -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
        -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
@@ -228,11 +238,14 @@ LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
 LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
 LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
 LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+SPLIT_TESTS_TARGETS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
+SPLIT_TESTS_OBJS := $(patsubst %, $(ARCH_DIR)/%.o, $(SPLIT_TESTS))
 
 TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
 TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
 TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
 TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TESTS_OBJS))
 -include $(TEST_DEP_FILES)
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o
@@ -240,7 +253,10 @@ $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o
 $(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
        $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
-EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) cscope.*
+$(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS)
+       $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
+
+EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) $(SPLIT_TESTS_OBJS) cscope.*
 
 x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
 $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
index 4951ac5..b905808 100644 (file)
@@ -98,7 +98,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
                uint64_t val;
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
 
                /*
                 * Expect the ioctl to succeed with no effect on the register
@@ -107,7 +107,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
                vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
        }
 }
 
@@ -127,14 +127,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
                uint64_t val;
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
 
                r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
                TEST_ASSERT(r < 0 && errno == EINVAL,
                            "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
 
                vcpu_get_reg(vcpu, reg_id, &val);
-               ASSERT_EQ(val, 0);
+               TEST_ASSERT_EQ(val, 0);
        }
 }
 
index 8ef3709..274b846 100644 (file)
@@ -19,7 +19,6 @@
  *
  * Copyright (c) 2021, Google LLC.
  */
-
 #define _GNU_SOURCE
 
 #include <stdlib.h>
@@ -155,11 +154,13 @@ static void guest_validate_irq(unsigned int intid,
        xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
 
        /* Make sure we are dealing with the correct timer IRQ */
-       GUEST_ASSERT_2(intid == timer_irq, intid, timer_irq);
+       GUEST_ASSERT_EQ(intid, timer_irq);
 
        /* Basic 'timer condition met' check */
-       GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us);
-       GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl);
+       __GUEST_ASSERT(xcnt >= cval,
+                      "xcnt = 0x%llx, cval = 0x%llx, xcnt_diff_us = 0x%llx",
+                      xcnt, cval, xcnt_diff_us);
+       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xcnt = 0x%llx", xcnt);
 
        WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
 }
@@ -192,8 +193,7 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
                        TIMER_TEST_ERR_MARGIN_US);
 
                irq_iter = READ_ONCE(shared_data->nr_iter);
-               GUEST_ASSERT_2(config_iter + 1 == irq_iter,
-                               config_iter + 1, irq_iter);
+               GUEST_ASSERT_EQ(config_iter + 1, irq_iter);
        }
 }
 
@@ -243,13 +243,9 @@ static void *test_vcpu_run(void *arg)
                break;
        case UCALL_ABORT:
                sync_global_from_guest(vm, *shared_data);
-               REPORT_GUEST_ASSERT_N(uc, "values: %lu, %lu; %lu, vcpu %u; stage; %u; iter: %u",
-                                     GUEST_ASSERT_ARG(uc, 0),
-                                     GUEST_ASSERT_ARG(uc, 1),
-                                     GUEST_ASSERT_ARG(uc, 2),
-                                     vcpu_idx,
-                                     shared_data->guest_stage,
-                                     shared_data->nr_iter);
+               fprintf(stderr, "Guest assert failed,  vcpu %u; stage; %u; iter: %u\n",
+                       vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
+               REPORT_GUEST_ASSERT(uc);
                break;
        default:
                TEST_FAIL("Unexpected guest exit\n");
index 637be79..f5b6cb3 100644 (file)
@@ -365,7 +365,7 @@ static void guest_wp_handler(struct ex_regs *regs)
 
 static void guest_ss_handler(struct ex_regs *regs)
 {
-       GUEST_ASSERT_1(ss_idx < 4, ss_idx);
+       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%u'", ss_idx);
        ss_addr[ss_idx++] = regs->pc;
        regs->pstate |= SPSR_SS;
 }
@@ -410,8 +410,8 @@ static void guest_code_ss(int test_cnt)
                /* Userspace disables Single Step when the end is nigh. */
                asm volatile("iter_ss_end:\n");
 
-               GUEST_ASSERT(bvr == w_bvr);
-               GUEST_ASSERT(wvr == w_wvr);
+               GUEST_ASSERT_EQ(bvr, w_bvr);
+               GUEST_ASSERT_EQ(wvr, w_wvr);
        }
        GUEST_DONE();
 }
@@ -450,7 +450,7 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp
        vcpu_run(vcpu);
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+               REPORT_GUEST_ASSERT(uc);
                break;
        case UCALL_DONE:
                goto done;
index 4f10055..709d7d7 100644 (file)
@@ -4,50 +4,17 @@
  *
  * Copyright (C) 2020, Red Hat, Inc.
  *
- * When attempting to migrate from a host with an older kernel to a host
- * with a newer kernel we allow the newer kernel on the destination to
- * list new registers with get-reg-list. We assume they'll be unused, at
- * least until the guest reboots, and so they're relatively harmless.
- * However, if the destination host with the newer kernel is missing
- * registers which the source host with the older kernel has, then that's
- * a regression in get-reg-list. This test checks for that regression by
- * checking the current list against a blessed list. We should never have
- * missing registers, but if new ones appear then they can probably be
- * added to the blessed list. A completely new blessed list can be created
- * by running the test with the --list command line argument.
- *
- * Note, the blessed list should be created from the oldest possible
- * kernel. We can't go older than v4.15, though, because that's the first
- * release to expose the ID system registers in KVM_GET_REG_LIST, see
- * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features
- * from guests"). Also, one must use the --core-reg-fixup command line
- * option when running on an older kernel that doesn't include df205b5c6328
- * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST")
+ * While the blessed list should be created from the oldest possible
+ * kernel, we can't go older than v5.2, though, because that's the first
+ * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
+ * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
+ * registers won't match expectations.
  */
 #include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/wait.h>
 #include "kvm_util.h"
 #include "test_util.h"
 #include "processor.h"
 
-static struct kvm_reg_list *reg_list;
-static __u64 *blessed_reg, blessed_n;
-
-struct reg_sublist {
-       const char *name;
-       long capability;
-       int feature;
-       bool finalize;
-       __u64 *regs;
-       __u64 regs_n;
-       __u64 *rejects_set;
-       __u64 rejects_set_n;
-};
-
 struct feature_id_reg {
        __u64 reg;
        __u64 id_reg;
@@ -76,70 +43,7 @@ static struct feature_id_reg feat_id_regs[] = {
        }
 };
 
-struct vcpu_config {
-       char *name;
-       struct reg_sublist sublists[];
-};
-
-static struct vcpu_config *vcpu_configs[];
-static int vcpu_configs_n;
-
-#define for_each_sublist(c, s)                                                 \
-       for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
-
-#define for_each_reg(i)                                                                \
-       for ((i) = 0; (i) < reg_list->n; ++(i))
-
-#define for_each_reg_filtered(i)                                               \
-       for_each_reg(i)                                                         \
-               if (!filter_reg(reg_list->reg[i]))
-
-#define for_each_missing_reg(i)                                                        \
-       for ((i) = 0; (i) < blessed_n; ++(i))                                   \
-               if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))      \
-                       if (check_supported_feat_reg(vcpu, blessed_reg[i]))
-
-#define for_each_new_reg(i)                                                    \
-       for_each_reg_filtered(i)                                                \
-               if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
-
-static const char *config_name(struct vcpu_config *c)
-{
-       struct reg_sublist *s;
-       int len = 0;
-
-       if (c->name)
-               return c->name;
-
-       for_each_sublist(c, s)
-               len += strlen(s->name) + 1;
-
-       c->name = malloc(len);
-
-       len = 0;
-       for_each_sublist(c, s) {
-               if (!strcmp(s->name, "base"))
-                       continue;
-               strcat(c->name + len, s->name);
-               len += strlen(s->name) + 1;
-               c->name[len - 1] = '+';
-       }
-       c->name[len - 1] = '\0';
-
-       return c->name;
-}
-
-static bool has_cap(struct vcpu_config *c, long capability)
-{
-       struct reg_sublist *s;
-
-       for_each_sublist(c, s)
-               if (s->capability == capability)
-                       return true;
-       return false;
-}
-
-static bool filter_reg(__u64 reg)
+bool filter_reg(__u64 reg)
 {
        /*
         * DEMUX register presence depends on the host's CLIDR_EL1.
@@ -151,16 +55,6 @@ static bool filter_reg(__u64 reg)
        return false;
 }
 
-static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
-{
-       int i;
-
-       for (i = 0; i < nr_regs; ++i)
-               if (reg == regs[i])
-                       return true;
-       return false;
-}
-
 static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
 {
        int i, ret;
@@ -180,17 +74,27 @@ static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
        return true;
 }
 
-static const char *str_with_index(const char *template, __u64 index)
+bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
 {
-       char *str, *p;
-       int n;
+       return check_supported_feat_reg(vcpu, reg);
+}
 
-       str = strdup(template);
-       p = strstr(str, "##");
-       n = sprintf(p, "%lld", index);
-       strcat(p + n, strstr(template, "##") + 2);
+bool check_reject_set(int err)
+{
+       return err == EPERM;
+}
 
-       return (const char *)str;
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+       int feature;
+
+       for_each_sublist(c, s) {
+               if (s->finalize) {
+                       feature = s->feature;
+                       vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
+               }
+       }
 }
 
 #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
@@ -199,7 +103,7 @@ static const char *str_with_index(const char *template, __u64 index)
 #define CORE_SPSR_XX_NR_WORDS  2
 #define CORE_FPREGS_XX_NR_WORDS        4
 
-static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
+static const char *core_id_to_str(const char *prefix, __u64 id)
 {
        __u64 core_off = id & ~REG_MASK, idx;
 
@@ -210,8 +114,8 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
        case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
             KVM_REG_ARM_CORE_REG(regs.regs[30]):
                idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx);
-               return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
+               TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
        case KVM_REG_ARM_CORE_REG(regs.sp):
                return "KVM_REG_ARM_CORE_REG(regs.sp)";
        case KVM_REG_ARM_CORE_REG(regs.pc):
@@ -225,24 +129,24 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
        case KVM_REG_ARM_CORE_REG(spsr[0]) ...
             KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
                idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
-               TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx);
-               return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
+               TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
        case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
             KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
                idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
-               TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx);
-               return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
+               TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
+               return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
        case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
                return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
        case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
                return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
        }
 
-       TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id);
+       TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
        return NULL;
 }
 
-static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
+static const char *sve_id_to_str(const char *prefix, __u64 id)
 {
        __u64 sve_off, n, i;
 
@@ -252,37 +156,37 @@ static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
        sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
        i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
 
-       TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id);
+       TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
 
        switch (sve_off) {
        case KVM_REG_ARM64_SVE_ZREG_BASE ...
             KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
                n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
                TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
-                           "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id);
-               return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
+                           "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
+               return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
        case KVM_REG_ARM64_SVE_PREG_BASE ...
             KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
                n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
                TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
-                           "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id);
-               return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
+                           "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
+               return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
        case KVM_REG_ARM64_SVE_FFR_BASE:
                TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
-                           "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
                return "KVM_REG_ARM64_SVE_FFR(0)";
        }
 
        return NULL;
 }
 
-static void print_reg(struct vcpu_config *c, __u64 id)
+void print_reg(const char *prefix, __u64 id)
 {
        unsigned op0, op1, crn, crm, op2;
        const char *reg_size = NULL;
 
        TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
-                   "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id);
+                   "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
 
        switch (id & KVM_REG_SIZE_MASK) {
        case KVM_REG_SIZE_U8:
@@ -314,16 +218,16 @@ static void print_reg(struct vcpu_config *c, __u64 id)
                break;
        default:
                TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
-                         config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+                         prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
        }
 
        switch (id & KVM_REG_ARM_COPROC_MASK) {
        case KVM_REG_ARM_CORE:
-               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id));
+               printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
                break;
        case KVM_REG_ARM_DEMUX:
                TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
-                           "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
                printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
                       reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
                break;
@@ -334,370 +238,34 @@ static void print_reg(struct vcpu_config *c, __u64 id)
                crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
                op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
                TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
-                           "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
                printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
                break;
        case KVM_REG_ARM_FW:
                TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
-                           "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
                printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
                break;
        case KVM_REG_ARM_FW_FEAT_BMAP:
                TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
-                           "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", config_name(c), id);
+                           "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
                printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
                break;
        case KVM_REG_ARM64_SVE:
-               if (has_cap(c, KVM_CAP_ARM_SVE))
-                       printf("\t%s,\n", sve_id_to_str(c, id));
-               else
-                       TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id);
+               printf("\t%s,\n", sve_id_to_str(prefix, id));
                break;
        default:
                TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
-                         config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
-       }
-}
-
-/*
- * Older kernels listed each 32-bit word of CORE registers separately.
- * For 64 and 128-bit registers we need to ignore the extra words. We
- * also need to fixup the sizes, because the older kernels stated all
- * registers were 64-bit, even when they weren't.
- */
-static void core_reg_fixup(void)
-{
-       struct kvm_reg_list *tmp;
-       __u64 id, core_off;
-       int i;
-
-       tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64));
-
-       for (i = 0; i < reg_list->n; ++i) {
-               id = reg_list->reg[i];
-
-               if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) {
-                       tmp->reg[tmp->n++] = id;
-                       continue;
-               }
-
-               core_off = id & ~REG_MASK;
-
-               switch (core_off) {
-               case 0x52: case 0xd2: case 0xd6:
-                       /*
-                        * These offsets are pointing at padding.
-                        * We need to ignore them too.
-                        */
-                       continue;
-               case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
-                    KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
-                       if (core_off & 3)
-                               continue;
-                       id &= ~KVM_REG_SIZE_MASK;
-                       id |= KVM_REG_SIZE_U128;
-                       tmp->reg[tmp->n++] = id;
-                       continue;
-               case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
-               case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
-                       id &= ~KVM_REG_SIZE_MASK;
-                       id |= KVM_REG_SIZE_U32;
-                       tmp->reg[tmp->n++] = id;
-                       continue;
-               default:
-                       if (core_off & 1)
-                               continue;
-                       tmp->reg[tmp->n++] = id;
-                       break;
-               }
+                         prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
        }
-
-       free(reg_list);
-       reg_list = tmp;
-}
-
-static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init)
-{
-       struct reg_sublist *s;
-
-       for_each_sublist(c, s)
-               if (s->capability)
-                       init->features[s->feature / 32] |= 1 << (s->feature % 32);
-}
-
-static void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_config *c)
-{
-       struct reg_sublist *s;
-       int feature;
-
-       for_each_sublist(c, s) {
-               if (s->finalize) {
-                       feature = s->feature;
-                       vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
-               }
-       }
-}
-
-static void check_supported(struct vcpu_config *c)
-{
-       struct reg_sublist *s;
-
-       for_each_sublist(c, s) {
-               if (!s->capability)
-                       continue;
-
-               __TEST_REQUIRE(kvm_has_cap(s->capability),
-                              "%s: %s not available, skipping tests\n",
-                              config_name(c), s->name);
-       }
-}
-
-static bool print_list;
-static bool print_filtered;
-static bool fixup_core_regs;
-
-static void run_test(struct vcpu_config *c)
-{
-       struct kvm_vcpu_init init = { .target = -1, };
-       int new_regs = 0, missing_regs = 0, i, n;
-       int failed_get = 0, failed_set = 0, failed_reject = 0;
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct reg_sublist *s;
-
-       check_supported(c);
-
-       vm = vm_create_barebones();
-       prepare_vcpu_init(c, &init);
-       vcpu = __vm_vcpu_add(vm, 0);
-       aarch64_vcpu_setup(vcpu, &init);
-       finalize_vcpu(vcpu, c);
-
-       reg_list = vcpu_get_reg_list(vcpu);
-
-       if (fixup_core_regs)
-               core_reg_fixup();
-
-       if (print_list || print_filtered) {
-               putchar('\n');
-               for_each_reg(i) {
-                       __u64 id = reg_list->reg[i];
-                       if ((print_list && !filter_reg(id)) ||
-                           (print_filtered && filter_reg(id)))
-                               print_reg(c, id);
-               }
-               putchar('\n');
-               return;
-       }
-
-       /*
-        * We only test that we can get the register and then write back the
-        * same value. Some registers may allow other values to be written
-        * back, but others only allow some bits to be changed, and at least
-        * for ID registers set will fail if the value does not exactly match
-        * what was returned by get. If registers that allow other values to
-        * be written need to have the other values tested, then we should
-        * create a new set of tests for those in a new independent test
-        * executable.
-        */
-       for_each_reg(i) {
-               uint8_t addr[2048 / 8];
-               struct kvm_one_reg reg = {
-                       .id = reg_list->reg[i],
-                       .addr = (__u64)&addr,
-               };
-               bool reject_reg = false;
-               int ret;
-
-               ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr);
-               if (ret) {
-                       printf("%s: Failed to get ", config_name(c));
-                       print_reg(c, reg.id);
-                       putchar('\n');
-                       ++failed_get;
-               }
-
-               /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
-               for_each_sublist(c, s) {
-                       if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
-                               reject_reg = true;
-                               ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
-                               if (ret != -1 || errno != EPERM) {
-                                       printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
-                                       print_reg(c, reg.id);
-                                       putchar('\n');
-                                       ++failed_reject;
-                               }
-                               break;
-                       }
-               }
-
-               if (!reject_reg) {
-                       ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
-                       if (ret) {
-                               printf("%s: Failed to set ", config_name(c));
-                               print_reg(c, reg.id);
-                               putchar('\n');
-                               ++failed_set;
-                       }
-               }
-       }
-
-       for_each_sublist(c, s)
-               blessed_n += s->regs_n;
-       blessed_reg = calloc(blessed_n, sizeof(__u64));
-
-       n = 0;
-       for_each_sublist(c, s) {
-               for (i = 0; i < s->regs_n; ++i)
-                       blessed_reg[n++] = s->regs[i];
-       }
-
-       for_each_new_reg(i)
-               ++new_regs;
-
-       for_each_missing_reg(i)
-               ++missing_regs;
-
-       if (new_regs || missing_regs) {
-               n = 0;
-               for_each_reg_filtered(i)
-                       ++n;
-
-               printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
-               printf("%s: Number registers:         %5lld (includes %lld filtered registers)\n",
-                      config_name(c), reg_list->n, reg_list->n - n);
-       }
-
-       if (new_regs) {
-               printf("\n%s: There are %d new registers.\n"
-                      "Consider adding them to the blessed reg "
-                      "list with the following lines:\n\n", config_name(c), new_regs);
-               for_each_new_reg(i)
-                       print_reg(c, reg_list->reg[i]);
-               putchar('\n');
-       }
-
-       if (missing_regs) {
-               printf("\n%s: There are %d missing registers.\n"
-                      "The following lines are missing registers:\n\n", config_name(c), missing_regs);
-               for_each_missing_reg(i)
-                       print_reg(c, blessed_reg[i]);
-               putchar('\n');
-       }
-
-       TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
-                   "%s: There are %d missing registers; "
-                   "%d registers failed get; %d registers failed set; %d registers failed reject",
-                   config_name(c), missing_regs, failed_get, failed_set, failed_reject);
-
-       pr_info("%s: PASS\n", config_name(c));
-       blessed_n = 0;
-       free(blessed_reg);
-       free(reg_list);
-       kvm_vm_free(vm);
-}
-
-static void help(void)
-{
-       struct vcpu_config *c;
-       int i;
-
-       printf(
-       "\n"
-       "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n"
-       " --config=<selection>        Used to select a specific vcpu configuration for the test/listing\n"
-       "                             '<selection>' may be\n");
-
-       for (i = 0; i < vcpu_configs_n; ++i) {
-               c = vcpu_configs[i];
-               printf(
-       "                               '%s'\n", config_name(c));
-       }
-
-       printf(
-       "\n"
-       " --list                      Print the register list rather than test it (requires --config)\n"
-       " --list-filtered             Print registers that would normally be filtered out (requires --config)\n"
-       " --core-reg-fixup            Needed when running on old kernels with broken core reg listings\n"
-       "\n"
-       );
-}
-
-static struct vcpu_config *parse_config(const char *config)
-{
-       struct vcpu_config *c;
-       int i;
-
-       if (config[8] != '=')
-               help(), exit(1);
-
-       for (i = 0; i < vcpu_configs_n; ++i) {
-               c = vcpu_configs[i];
-               if (strcmp(config_name(c), &config[9]) == 0)
-                       break;
-       }
-
-       if (i == vcpu_configs_n)
-               help(), exit(1);
-
-       return c;
-}
-
-int main(int ac, char **av)
-{
-       struct vcpu_config *c, *sel = NULL;
-       int i, ret = 0;
-       pid_t pid;
-
-       for (i = 1; i < ac; ++i) {
-               if (strcmp(av[i], "--core-reg-fixup") == 0)
-                       fixup_core_regs = true;
-               else if (strncmp(av[i], "--config", 8) == 0)
-                       sel = parse_config(av[i]);
-               else if (strcmp(av[i], "--list") == 0)
-                       print_list = true;
-               else if (strcmp(av[i], "--list-filtered") == 0)
-                       print_filtered = true;
-               else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
-                       help(), exit(0);
-               else
-                       help(), exit(1);
-       }
-
-       if (print_list || print_filtered) {
-               /*
-                * We only want to print the register list of a single config.
-                */
-               if (!sel)
-                       help(), exit(1);
-       }
-
-       for (i = 0; i < vcpu_configs_n; ++i) {
-               c = vcpu_configs[i];
-               if (sel && c != sel)
-                       continue;
-
-               pid = fork();
-
-               if (!pid) {
-                       run_test(c);
-                       exit(0);
-               } else {
-                       int wstatus;
-                       pid_t wpid = wait(&wstatus);
-                       TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
-                       if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
-                               ret = KSFT_FAIL;
-               }
-       }
-
-       return ret;
 }
 
 /*
- * The current blessed list was primed with the output of kernel version
+ * The original blessed list was primed with the output of kernel version
  * v4.15 with --core-reg-fixup and then later updated with new registers.
+ * (The --core-reg-fixup option and it's fixup function have been removed
+ * from the test, as it's unlikely to use this type of test on a kernel
+ * older than v5.2.)
  *
  * The blessed list is up to date with kernel version v6.4 (or so we hope)
  */
@@ -1130,14 +698,14 @@ static __u64 pauth_generic_regs[] = {
                .regs_n         = ARRAY_SIZE(pauth_generic_regs),       \
        }
 
-static struct vcpu_config vregs_config = {
+static struct vcpu_reg_list vregs_config = {
        .sublists = {
        BASE_SUBLIST,
        VREGS_SUBLIST,
        {0},
        },
 };
-static struct vcpu_config vregs_pmu_config = {
+static struct vcpu_reg_list vregs_pmu_config = {
        .sublists = {
        BASE_SUBLIST,
        VREGS_SUBLIST,
@@ -1145,14 +713,14 @@ static struct vcpu_config vregs_pmu_config = {
        {0},
        },
 };
-static struct vcpu_config sve_config = {
+static struct vcpu_reg_list sve_config = {
        .sublists = {
        BASE_SUBLIST,
        SVE_SUBLIST,
        {0},
        },
 };
-static struct vcpu_config sve_pmu_config = {
+static struct vcpu_reg_list sve_pmu_config = {
        .sublists = {
        BASE_SUBLIST,
        SVE_SUBLIST,
@@ -1160,7 +728,7 @@ static struct vcpu_config sve_pmu_config = {
        {0},
        },
 };
-static struct vcpu_config pauth_config = {
+static struct vcpu_reg_list pauth_config = {
        .sublists = {
        BASE_SUBLIST,
        VREGS_SUBLIST,
@@ -1168,7 +736,7 @@ static struct vcpu_config pauth_config = {
        {0},
        },
 };
-static struct vcpu_config pauth_pmu_config = {
+static struct vcpu_reg_list pauth_pmu_config = {
        .sublists = {
        BASE_SUBLIST,
        VREGS_SUBLIST,
@@ -1178,7 +746,7 @@ static struct vcpu_config pauth_pmu_config = {
        },
 };
 
-static struct vcpu_config *vcpu_configs[] = {
+struct vcpu_reg_list *vcpu_configs[] = {
        &vregs_config,
        &vregs_pmu_config,
        &sve_config,
@@ -1186,4 +754,4 @@ static struct vcpu_config *vcpu_configs[] = {
        &pauth_config,
        &pauth_pmu_config,
 };
-static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
index bef1499..31f66ba 100644 (file)
@@ -8,7 +8,6 @@
  * hypercalls are properly masked or unmasked to the guest when disabled or
  * enabled from the KVM userspace, respectively.
  */
-
 #include <errno.h>
 #include <linux/arm-smccc.h>
 #include <asm/kvm.h>
@@ -105,15 +104,17 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info)
                switch (stage) {
                case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
                case TEST_STAGE_HVC_IFACE_FALSE_INFO:
-                       GUEST_ASSERT_3(res.a0 == SMCCC_RET_NOT_SUPPORTED,
-                                       res.a0, hc_info->func_id, hc_info->arg1);
+                       __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
                        break;
                case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
-                       GUEST_ASSERT_3(res.a0 != SMCCC_RET_NOT_SUPPORTED,
-                                       res.a0, hc_info->func_id, hc_info->arg1);
+                       __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                       res.a0, hc_info->func_id, hc_info->arg1, stage);
                        break;
                default:
-                       GUEST_ASSERT_1(0, stage);
+                       GUEST_FAIL("Unexpected stage = %u", stage);
                }
        }
 }
@@ -132,7 +133,7 @@ static void guest_code(void)
                        guest_test_hvc(false_hvc_info);
                        break;
                default:
-                       GUEST_ASSERT_1(0, stage);
+                       GUEST_FAIL("Unexpected stage = %u", stage);
                }
 
                GUEST_SYNC(stage);
@@ -290,10 +291,7 @@ static void test_run(void)
                        guest_done = true;
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_N(uc, "values: 0x%lx, 0x%lx; 0x%lx, stage: %u",
-                                             GUEST_ASSERT_ARG(uc, 0),
-                                             GUEST_ASSERT_ARG(uc, 1),
-                                             GUEST_ASSERT_ARG(uc, 2), stage);
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                default:
                        TEST_FAIL("Unexpected guest exit\n");
index df10f1f..47bb914 100644 (file)
@@ -7,7 +7,6 @@
  * hugetlbfs with a hole). It checks that the expected handling method is
  * called (e.g., uffd faults with the right address and write/read flag).
  */
-
 #define _GNU_SOURCE
 #include <linux/bitmap.h>
 #include <fcntl.h>
@@ -293,12 +292,12 @@ static void guest_code(struct test_desc *test)
 
 static void no_dabt_handler(struct ex_regs *regs)
 {
-       GUEST_ASSERT_1(false, read_sysreg(far_el1));
+       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%llx", read_sysreg(far_el1));
 }
 
 static void no_iabt_handler(struct ex_regs *regs)
 {
-       GUEST_ASSERT_1(false, regs->pc);
+       GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
 }
 
 static struct uffd_args {
@@ -318,7 +317,7 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
 
        TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
                    "The only expected UFFD mode is MISSING");
-       ASSERT_EQ(addr, (uint64_t)args->hva);
+       TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
 
        pr_debug("uffd fault: addr=%p write=%d\n",
                 (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
@@ -432,7 +431,7 @@ static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
        region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
        hva = (void *)region->region.userspace_addr;
 
-       ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+       TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
 
        memcpy(hva, run->mmio.data, run->mmio.len);
        events.mmio_exits += 1;
@@ -631,9 +630,9 @@ static void setup_default_handlers(struct test_desc *test)
 
 static void check_event_counts(struct test_desc *test)
 {
-       ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
-       ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
-       ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
+       TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+       TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+       TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
 }
 
 static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
@@ -679,7 +678,7 @@ static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
                        }
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
index 90d854e..2e64b48 100644 (file)
@@ -7,7 +7,6 @@
  * host to inject a specific intid via a GUEST_SYNC call, and then checks that
  * it received it.
  */
-
 #include <asm/kvm.h>
 #include <asm/kvm_para.h>
 #include <sys/eventfd.h>
@@ -781,7 +780,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
                        run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c
new file mode 100644 (file)
index 0000000..be7bf52
--- /dev/null
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * When attempting to migrate from a host with an older kernel to a host
+ * with a newer kernel we allow the newer kernel on the destination to
+ * list new registers with get-reg-list. We assume they'll be unused, at
+ * least until the guest reboots, and so they're relatively harmless.
+ * However, if the destination host with the newer kernel is missing
+ * registers which the source host with the older kernel has, then that's
+ * a regression in get-reg-list. This test checks for that regression by
+ * checking the current list against a blessed list. We should never have
+ * missing registers, but if new ones appear then they can probably be
+ * added to the blessed list. A completely new blessed list can be created
+ * by running the test with the --list command line argument.
+ *
+ * The blessed list should be created from the oldest possible kernel.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
+
+extern struct vcpu_reg_list *vcpu_configs[];
+extern int vcpu_configs_n;
+
+#define for_each_reg(i)                                                                \
+       for ((i) = 0; (i) < reg_list->n; ++(i))
+
+#define for_each_reg_filtered(i)                                               \
+       for_each_reg(i)                                                         \
+               if (!filter_reg(reg_list->reg[i]))
+
+#define for_each_missing_reg(i)                                                        \
+       for ((i) = 0; (i) < blessed_n; ++(i))                                   \
+               if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))      \
+                       if (check_supported_reg(vcpu, blessed_reg[i]))
+
+#define for_each_new_reg(i)                                                    \
+       for_each_reg_filtered(i)                                                \
+               if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+#define for_each_present_blessed_reg(i)                                                \
+       for_each_reg(i)                                                         \
+               if (find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+static const char *config_name(struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+       int len = 0;
+
+       if (c->name)
+               return c->name;
+
+       for_each_sublist(c, s)
+               len += strlen(s->name) + 1;
+
+       c->name = malloc(len);
+
+       len = 0;
+       for_each_sublist(c, s) {
+               if (!strcmp(s->name, "base"))
+                       continue;
+               strcat(c->name + len, s->name);
+               len += strlen(s->name) + 1;
+               c->name[len - 1] = '+';
+       }
+       c->name[len - 1] = '\0';
+
+       return c->name;
+}
+
+bool __weak check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+       return true;
+}
+
+bool __weak filter_reg(__u64 reg)
+{
+       return false;
+}
+
+static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+{
+       int i;
+
+       for (i = 0; i < nr_regs; ++i)
+               if (reg == regs[i])
+                       return true;
+       return false;
+}
+
+void __weak print_reg(const char *prefix, __u64 id)
+{
+       printf("\t0x%llx,\n", id);
+}
+
+bool __weak check_reject_set(int err)
+{
+       return true;
+}
+
+void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+}
+
+#ifdef __aarch64__
+static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init)
+{
+       struct vcpu_reg_sublist *s;
+
+       for_each_sublist(c, s)
+               if (s->capability)
+                       init->features[s->feature / 32] |= 1 << (s->feature % 32);
+}
+
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+       struct kvm_vcpu_init init = { .target = -1, };
+       struct kvm_vcpu *vcpu;
+
+       prepare_vcpu_init(c, &init);
+       vcpu = __vm_vcpu_add(vm, 0);
+       aarch64_vcpu_setup(vcpu, &init);
+
+       return vcpu;
+}
+#else
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+       return __vm_vcpu_add(vm, 0);
+}
+#endif
+
+static void check_supported(struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+
+       for_each_sublist(c, s) {
+               if (!s->capability)
+                       continue;
+
+               __TEST_REQUIRE(kvm_has_cap(s->capability),
+                              "%s: %s not available, skipping tests\n",
+                              config_name(c), s->name);
+       }
+}
+
+static bool print_list;
+static bool print_filtered;
+
+static void run_test(struct vcpu_reg_list *c)
+{
+       int new_regs = 0, missing_regs = 0, i, n;
+       int failed_get = 0, failed_set = 0, failed_reject = 0;
+       int skipped_set = 0;
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct vcpu_reg_sublist *s;
+
+       check_supported(c);
+
+       vm = vm_create_barebones();
+       vcpu = vcpu_config_get_vcpu(c, vm);
+       finalize_vcpu(vcpu, c);
+
+       reg_list = vcpu_get_reg_list(vcpu);
+
+       if (print_list || print_filtered) {
+               putchar('\n');
+               for_each_reg(i) {
+                       __u64 id = reg_list->reg[i];
+                       if ((print_list && !filter_reg(id)) ||
+                           (print_filtered && filter_reg(id)))
+                               print_reg(config_name(c), id);
+               }
+               putchar('\n');
+               return;
+       }
+
+       for_each_sublist(c, s)
+               blessed_n += s->regs_n;
+       blessed_reg = calloc(blessed_n, sizeof(__u64));
+
+       n = 0;
+       for_each_sublist(c, s) {
+               for (i = 0; i < s->regs_n; ++i)
+                       blessed_reg[n++] = s->regs[i];
+       }
+
+       /*
+        * We only test that we can get the register and then write back the
+        * same value. Some registers may allow other values to be written
+        * back, but others only allow some bits to be changed, and at least
+        * for ID registers set will fail if the value does not exactly match
+        * what was returned by get. If registers that allow other values to
+        * be written need to have the other values tested, then we should
+        * create a new set of tests for those in a new independent test
+        * executable.
+        *
+        * Only do the get/set tests on present, blessed list registers,
+        * since we don't know the capabilities of any new registers.
+        */
+       for_each_present_blessed_reg(i) {
+               uint8_t addr[2048 / 8];
+               struct kvm_one_reg reg = {
+                       .id = reg_list->reg[i],
+                       .addr = (__u64)&addr,
+               };
+               bool reject_reg = false, skip_reg = false;
+               int ret;
+
+               ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr);
+               if (ret) {
+                       printf("%s: Failed to get ", config_name(c));
+                       print_reg(config_name(c), reg.id);
+                       putchar('\n');
+                       ++failed_get;
+               }
+
+               for_each_sublist(c, s) {
+                       /* rejects_set registers are rejected for set operation */
+                       if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+                               reject_reg = true;
+                               ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+                               if (ret != -1 || !check_reject_set(errno)) {
+                                       printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+                                       print_reg(config_name(c), reg.id);
+                                       putchar('\n');
+                                       ++failed_reject;
+                               }
+                               break;
+                       }
+
+                       /* skips_set registers are skipped for set operation */
+                       if (s->skips_set && find_reg(s->skips_set, s->skips_set_n, reg.id)) {
+                               skip_reg = true;
+                               ++skipped_set;
+                               break;
+                       }
+               }
+
+               if (!reject_reg && !skip_reg) {
+                       ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+                       if (ret) {
+                               printf("%s: Failed to set ", config_name(c));
+                               print_reg(config_name(c), reg.id);
+                               putchar('\n');
+                               ++failed_set;
+                       }
+               }
+       }
+
+       for_each_new_reg(i)
+               ++new_regs;
+
+       for_each_missing_reg(i)
+               ++missing_regs;
+
+       if (new_regs || missing_regs) {
+               n = 0;
+               for_each_reg_filtered(i)
+                       ++n;
+
+               printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+               printf("%s: Number registers:         %5lld (includes %lld filtered registers)\n",
+                      config_name(c), reg_list->n, reg_list->n - n);
+       }
+
+       if (new_regs) {
+               printf("\n%s: There are %d new registers.\n"
+                      "Consider adding them to the blessed reg "
+                      "list with the following lines:\n\n", config_name(c), new_regs);
+               for_each_new_reg(i)
+                       print_reg(config_name(c), reg_list->reg[i]);
+               putchar('\n');
+       }
+
+       if (missing_regs) {
+               printf("\n%s: There are %d missing registers.\n"
+                      "The following lines are missing registers:\n\n", config_name(c), missing_regs);
+               for_each_missing_reg(i)
+                       print_reg(config_name(c), blessed_reg[i]);
+               putchar('\n');
+       }
+
+       TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
+                   "%s: There are %d missing registers; %d registers failed get; "
+                   "%d registers failed set; %d registers failed reject; %d registers skipped set",
+                   config_name(c), missing_regs, failed_get, failed_set, failed_reject, skipped_set);
+
+       pr_info("%s: PASS\n", config_name(c));
+       blessed_n = 0;
+       free(blessed_reg);
+       free(reg_list);
+       kvm_vm_free(vm);
+}
+
+static void help(void)
+{
+       struct vcpu_reg_list *c;
+       int i;
+
+       printf(
+       "\n"
+       "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered]\n\n"
+       " --config=<selection>        Used to select a specific vcpu configuration for the test/listing\n"
+       "                             '<selection>' may be\n");
+
+       for (i = 0; i < vcpu_configs_n; ++i) {
+               c = vcpu_configs[i];
+               printf(
+       "                               '%s'\n", config_name(c));
+       }
+
+       printf(
+       "\n"
+       " --list                      Print the register list rather than test it (requires --config)\n"
+       " --list-filtered             Print registers that would normally be filtered out (requires --config)\n"
+       "\n"
+       );
+}
+
+static struct vcpu_reg_list *parse_config(const char *config)
+{
+       struct vcpu_reg_list *c = NULL;
+       int i;
+
+       if (config[8] != '=')
+               help(), exit(1);
+
+       for (i = 0; i < vcpu_configs_n; ++i) {
+               c = vcpu_configs[i];
+               if (strcmp(config_name(c), &config[9]) == 0)
+                       break;
+       }
+
+       if (i == vcpu_configs_n)
+               help(), exit(1);
+
+       return c;
+}
+
+int main(int ac, char **av)
+{
+       struct vcpu_reg_list *c, *sel = NULL;
+       int i, ret = 0;
+       pid_t pid;
+
+       for (i = 1; i < ac; ++i) {
+               if (strncmp(av[i], "--config", 8) == 0)
+                       sel = parse_config(av[i]);
+               else if (strcmp(av[i], "--list") == 0)
+                       print_list = true;
+               else if (strcmp(av[i], "--list-filtered") == 0)
+                       print_filtered = true;
+               else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+                       help(), exit(0);
+               else
+                       help(), exit(1);
+       }
+
+       if (print_list || print_filtered) {
+               /*
+                * We only want to print the register list of a single config.
+                */
+               if (!sel)
+                       help(), exit(1);
+       }
+
+       for (i = 0; i < vcpu_configs_n; ++i) {
+               c = vcpu_configs[i];
+               if (sel && c != sel)
+                       continue;
+
+               pid = fork();
+
+               if (!pid) {
+                       run_test(c);
+                       exit(0);
+               } else {
+                       int wstatus;
+                       pid_t wpid = wait(&wstatus);
+                       TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+                       if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+                               ret = KSFT_FAIL;
+               }
+       }
+
+       return ret;
+}
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
new file mode 100644 (file)
index 0000000..41230b7
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test for GUEST_PRINTF
+ *
+ * Copyright 2022, Google, Inc. and/or its affiliates.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct guest_vals {
+       uint64_t a;
+       uint64_t b;
+       uint64_t type;
+};
+
+static struct guest_vals vals;
+
+/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */
+#define TYPE_LIST                                      \
+TYPE(test_type_i64,  I64,  "%ld",   int64_t)           \
+TYPE(test_type_u64,  U64u, "%lu",   uint64_t)          \
+TYPE(test_type_x64,  U64x, "0x%lx", uint64_t)          \
+TYPE(test_type_X64,  U64X, "0x%lX", uint64_t)          \
+TYPE(test_type_u32,  U32u, "%u",    uint32_t)          \
+TYPE(test_type_x32,  U32x, "0x%x",  uint32_t)          \
+TYPE(test_type_X32,  U32X, "0x%X",  uint32_t)          \
+TYPE(test_type_int,  INT,  "%d",    int)               \
+TYPE(test_type_char, CHAR, "%c",    char)              \
+TYPE(test_type_str,  STR,  "'%s'",  const char *)      \
+TYPE(test_type_ptr,  PTR,  "%p",    uintptr_t)
+
+enum args_type {
+#define TYPE(fn, ext, fmt_t, T) TYPE_##ext,
+       TYPE_LIST
+#undef TYPE
+};
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+                    const char *expected_assert);
+
+#define BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T)                    \
+const char *PRINTF_FMT_##ext = "Got params a = " fmt_t " and b = " fmt_t;    \
+const char *ASSERT_FMT_##ext = "Expected " fmt_t ", got " fmt_t " instead";  \
+static void fn(struct kvm_vcpu *vcpu, T a, T b)                                     \
+{                                                                           \
+       char expected_printf[UCALL_BUFFER_LEN];                              \
+       char expected_assert[UCALL_BUFFER_LEN];                              \
+                                                                            \
+       snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \
+       snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \
+       vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext };  \
+       sync_global_to_guest(vcpu->vm, vals);                                \
+       run_test(vcpu, expected_printf, expected_assert);                    \
+}
+
+#define TYPE(fn, ext, fmt_t, T) \
+               BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T)
+       TYPE_LIST
+#undef TYPE
+
+static void guest_code(void)
+{
+       while (1) {
+               switch (vals.type) {
+#define TYPE(fn, ext, fmt_t, T)                                                        \
+               case TYPE_##ext:                                                \
+                       GUEST_PRINTF(PRINTF_FMT_##ext, vals.a, vals.b);         \
+                       __GUEST_ASSERT(vals.a == vals.b,                        \
+                                      ASSERT_FMT_##ext, vals.a, vals.b);       \
+                       break;
+               TYPE_LIST
+#undef TYPE
+               default:
+                       GUEST_SYNC(vals.type);
+               }
+
+               GUEST_DONE();
+       }
+}
+
+/*
+ * Unfortunately this gets a little messy because 'assert_msg' doesn't
+ * just contains the matching string, it also contains additional assert
+ * info.  Fortunately the part that matches should be at the very end of
+ * 'assert_msg'.
+ */
+static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
+{
+       int len_str = strlen(assert_msg);
+       int len_substr = strlen(expected_assert_msg);
+       int offset = len_str - len_substr;
+
+       TEST_ASSERT(len_substr <= len_str,
+                   "Expected '%s' to be a substring of '%s'\n",
+                   assert_msg, expected_assert_msg);
+
+       TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0,
+                   "Unexpected mismatch. Expected: '%s', got: '%s'",
+                   expected_assert_msg, &assert_msg[offset]);
+}
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+                    const char *expected_assert)
+{
+       struct kvm_run *run = vcpu->run;
+       struct ucall uc;
+
+       while (1) {
+               vcpu_run(vcpu);
+
+               TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+                           "Unexpected exit reason: %u (%s),\n",
+                           run->exit_reason, exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       TEST_FAIL("Unknown 'args_type' = %lu", uc.args[1]);
+                       break;
+               case UCALL_PRINTF:
+                       TEST_ASSERT(strcmp(uc.buffer, expected_printf) == 0,
+                                   "Unexpected mismatch. Expected: '%s', got: '%s'",
+                                   expected_printf, uc.buffer);
+                       break;
+               case UCALL_ABORT:
+                       ucall_abort(uc.buffer, expected_assert);
+                       break;
+               case UCALL_DONE:
+                       return;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+       }
+}
+
+static void guest_code_limits(void)
+{
+       char test_str[UCALL_BUFFER_LEN + 10];
+
+       memset(test_str, 'a', sizeof(test_str));
+       test_str[sizeof(test_str) - 1] = 0;
+
+       GUEST_PRINTF("%s", test_str);
+}
+
+static void test_limits(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits);
+       run = vcpu->run;
+       vcpu_run(vcpu);
+
+       TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+                   "Unexpected exit reason: %u (%s),\n",
+                   run->exit_reason, exit_reason_str(run->exit_reason));
+
+       TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT,
+                   "Unexpected ucall command: %lu,  Expected: %u (UCALL_ABORT)\n",
+                   uc.cmd, UCALL_ABORT);
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+       test_type_i64(vcpu, -1, -1);
+       test_type_i64(vcpu, -1,  1);
+       test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+       test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+       test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+       test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+       test_type_u32(vcpu, 0x90abcdef, 0x90abcdef);
+       test_type_u32(vcpu, 0x90abcdef, 0x90abcdee);
+       test_type_x32(vcpu, 0x90abcdef, 0x90abcdef);
+       test_type_x32(vcpu, 0x90abcdef, 0x90abcdee);
+       test_type_X32(vcpu, 0x90abcdef, 0x90abcdef);
+       test_type_X32(vcpu, 0x90abcdef, 0x90abcdee);
+
+       test_type_int(vcpu, -1, -1);
+       test_type_int(vcpu, -1,  1);
+       test_type_int(vcpu,  1,  1);
+
+       test_type_char(vcpu, 'a', 'a');
+       test_type_char(vcpu, 'a', 'A');
+       test_type_char(vcpu, 'a', 'b');
+
+       test_type_str(vcpu, "foo", "foo");
+       test_type_str(vcpu, "foo", "bar");
+
+       test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+       test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+       kvm_vm_free(vm);
+
+       test_limits();
+
+       return 0;
+}
index cb7c03d..b3e9752 100644 (file)
@@ -41,7 +41,7 @@ static inline uint64_t timer_get_cntct(enum arch_timer timer)
        case PHYSICAL:
                return read_sysreg(cntpct_el0);
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        /* We should not reach here */
@@ -58,7 +58,7 @@ static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
                write_sysreg(cval, cntp_cval_el0);
                break;
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        isb();
@@ -72,7 +72,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer)
        case PHYSICAL:
                return read_sysreg(cntp_cval_el0);
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        /* We should not reach here */
@@ -89,7 +89,7 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
                write_sysreg(tval, cntp_tval_el0);
                break;
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        isb();
@@ -105,7 +105,7 @@ static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
                write_sysreg(ctl, cntp_ctl_el0);
                break;
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        isb();
@@ -119,7 +119,7 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer)
        case PHYSICAL:
                return read_sysreg(cntp_ctl_el0);
        default:
-               GUEST_ASSERT_1(0, timer);
+               GUEST_FAIL("Unexpected timer type = %u", timer);
        }
 
        /* We should not reach here */
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h
new file mode 100644 (file)
index 0000000..4b68f37
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
index eb1ff59..a18db6a 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/kvm.h>
 #include "linux/rbtree.h"
+#include <linux/types.h>
 
 #include <asm/atomic.h>
 
@@ -124,6 +125,26 @@ struct kvm_vm {
        uint32_t memslots[NR_MEM_REGIONS];
 };
 
+struct vcpu_reg_sublist {
+       const char *name;
+       long capability;
+       int feature;
+       bool finalize;
+       __u64 *regs;
+       __u64 regs_n;
+       __u64 *rejects_set;
+       __u64 rejects_set_n;
+       __u64 *skips_set;
+       __u64 skips_set_n;
+};
+
+struct vcpu_reg_list {
+       char *name;
+       struct vcpu_reg_sublist sublists[];
+};
+
+#define for_each_sublist(c, s)         \
+       for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
 
 #define kvm_for_each_vcpu(vm, i, vcpu)                 \
        for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
index d00d213..5b62a3d 100644 (file)
@@ -38,6 +38,9 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx,
                                             KVM_REG_RISCV_TIMER_REG(name), \
                                             KVM_REG_SIZE_U64)
 
+#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \
+                                            idx, KVM_REG_SIZE_ULONG)
+
 /* L3 index Bit[47:39] */
 #define PGTBL_L3_INDEX_MASK                    0x0000FF8000000000ULL
 #define PGTBL_L3_INDEX_SHIFT                   39
diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h
new file mode 100644 (file)
index 0000000..be46eb3
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "processor.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_RISCV_SBI
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+                 KVM_RISCV_SELFTESTS_SBI_UCALL,
+                 uc, 0, 0, 0, 0, 0);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h
new file mode 100644 (file)
index 0000000..b231bf2
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_S390_SIEIC
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+}
+
+#endif
index a6e9f21..7e614ad 100644 (file)
@@ -53,14 +53,13 @@ void test_assert(bool exp, const char *exp_str,
 #define TEST_ASSERT(e, fmt, ...) \
        test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
 
-#define ASSERT_EQ(a, b) do { \
-       typeof(a) __a = (a); \
-       typeof(b) __b = (b); \
-       TEST_ASSERT(__a == __b, \
-                   "ASSERT_EQ(%s, %s) failed.\n" \
-                   "\t%s is %#lx\n" \
-                   "\t%s is %#lx", \
-                   #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+#define TEST_ASSERT_EQ(a, b)                                           \
+do {                                                                   \
+       typeof(a) __a = (a);                                            \
+       typeof(b) __b = (b);                                            \
+       test_assert(__a == __b, #a " == " #b, __FILE__, __LINE__,       \
+                   "%#lx != %#lx (%s != %s)",                          \
+                   (unsigned long)(__a), (unsigned long)(__b), #a, #b);\
 } while (0)
 
 #define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do {               \
@@ -186,4 +185,9 @@ static inline uint32_t atoi_non_negative(const char *name, const char *num_str)
        return num;
 }
 
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args);
+int guest_snprintf(char *buf, int n, const char *fmt, ...);
+
+char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1)));
+
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
index 1a6aaef..112bc1d 100644 (file)
@@ -7,21 +7,25 @@
 #ifndef SELFTEST_KVM_UCALL_COMMON_H
 #define SELFTEST_KVM_UCALL_COMMON_H
 #include "test_util.h"
+#include "ucall.h"
 
 /* Common ucalls */
 enum {
        UCALL_NONE,
        UCALL_SYNC,
        UCALL_ABORT,
+       UCALL_PRINTF,
        UCALL_DONE,
        UCALL_UNHANDLED,
 };
 
 #define UCALL_MAX_ARGS 7
+#define UCALL_BUFFER_LEN 1024
 
 struct ucall {
        uint64_t cmd;
        uint64_t args[UCALL_MAX_ARGS];
+       char buffer[UCALL_BUFFER_LEN];
 
        /* Host virtual address of this struct. */
        struct ucall *hva;
@@ -32,8 +36,12 @@ void ucall_arch_do_ucall(vm_vaddr_t uc);
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
 
 void ucall(uint64_t cmd, int nargs, ...);
+void ucall_fmt(uint64_t cmd, const char *fmt, ...);
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+                 unsigned int line, const char *fmt, ...);
 uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
 void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
+int ucall_nr_pages_required(uint64_t page_size);
 
 /*
  * Perform userspace call without any associated data.  This bare call avoids
@@ -46,8 +54,11 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
 #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
                                ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)      ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args)
 #define GUEST_DONE()           ucall(UCALL_DONE, 0)
 
+#define REPORT_GUEST_PRINTF(ucall) pr_info("%s", (ucall).buffer)
+
 enum guest_assert_builtin_args {
        GUEST_ERROR_STRING,
        GUEST_FILE,
@@ -55,70 +66,41 @@ enum guest_assert_builtin_args {
        GUEST_ASSERT_BUILTIN_NARGS
 };
 
-#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...)         \
-do {                                                                   \
-       if (!(_condition))                                              \
-               ucall(UCALL_ABORT, GUEST_ASSERT_BUILTIN_NARGS + _nargs, \
-                     "Failed guest assert: " _condstr,                 \
-                     __FILE__, __LINE__, ##_args);                     \
+#define ____GUEST_ASSERT(_condition, _exp, _fmt, _args...)                             \
+do {                                                                                   \
+       if (!(_condition))                                                              \
+               ucall_assert(UCALL_ABORT, _exp, __FILE__, __LINE__, _fmt, ##_args);     \
 } while (0)
 
-#define GUEST_ASSERT(_condition) \
-       __GUEST_ASSERT(_condition, #_condition, 0, 0)
-
-#define GUEST_ASSERT_1(_condition, arg1) \
-       __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
-
-#define GUEST_ASSERT_2(_condition, arg1, arg2) \
-       __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
-
-#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
-       __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
-
-#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
-       __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
-
-#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+#define __GUEST_ASSERT(_condition, _fmt, _args...)                             \
+       ____GUEST_ASSERT(_condition, #_condition, _fmt, ##_args)
 
-#define __REPORT_GUEST_ASSERT(_ucall, fmt, _args...)                   \
-       TEST_FAIL("%s at %s:%ld\n" fmt,                                 \
-                 (const char *)(_ucall).args[GUEST_ERROR_STRING],      \
-                 (const char *)(_ucall).args[GUEST_FILE],              \
-                 (_ucall).args[GUEST_LINE],                            \
-                 ##_args)
+#define GUEST_ASSERT(_condition)                                               \
+       __GUEST_ASSERT(_condition, #_condition)
 
-#define GUEST_ASSERT_ARG(ucall, i) ((ucall).args[GUEST_ASSERT_BUILTIN_NARGS + i])
+#define GUEST_FAIL(_fmt, _args...)                                             \
+       ucall_assert(UCALL_ABORT, "Unconditional guest failure",                \
+                    __FILE__, __LINE__, _fmt, ##_args)
 
-#define REPORT_GUEST_ASSERT(ucall)             \
-       __REPORT_GUEST_ASSERT((ucall), "")
-
-#define REPORT_GUEST_ASSERT_1(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0))
-
-#define REPORT_GUEST_ASSERT_2(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0),     \
-                             GUEST_ASSERT_ARG((ucall), 1))
-
-#define REPORT_GUEST_ASSERT_3(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0),     \
-                             GUEST_ASSERT_ARG((ucall), 1),     \
-                             GUEST_ASSERT_ARG((ucall), 2))
+#define GUEST_ASSERT_EQ(a, b)                                                  \
+do {                                                                           \
+       typeof(a) __a = (a);                                                    \
+       typeof(b) __b = (b);                                                    \
+       ____GUEST_ASSERT(__a == __b, #a " == " #b, "%#lx != %#lx (%s != %s)",   \
+                        (unsigned long)(__a), (unsigned long)(__b), #a, #b);   \
+} while (0)
 
-#define REPORT_GUEST_ASSERT_4(ucall, fmt)                      \
-       __REPORT_GUEST_ASSERT((ucall),                          \
-                             fmt,                              \
-                             GUEST_ASSERT_ARG((ucall), 0),     \
-                             GUEST_ASSERT_ARG((ucall), 1),     \
-                             GUEST_ASSERT_ARG((ucall), 2),     \
-                             GUEST_ASSERT_ARG((ucall), 3))
+#define GUEST_ASSERT_NE(a, b)                                                  \
+do {                                                                           \
+       typeof(a) __a = (a);                                                    \
+       typeof(b) __b = (b);                                                    \
+       ____GUEST_ASSERT(__a != __b, #a " != " #b, "%#lx == %#lx (%s == %s)",   \
+                        (unsigned long)(__a), (unsigned long)(__b), #a, #b);   \
+} while (0)
 
-#define REPORT_GUEST_ASSERT_N(ucall, fmt, args...)     \
-       __REPORT_GUEST_ASSERT((ucall), fmt, ##args)
+#define REPORT_GUEST_ASSERT(ucall)                                             \
+       test_assert(false, (const char *)(ucall).args[GUEST_ERROR_STRING],      \
+                   (const char *)(ucall).args[GUEST_FILE],                     \
+                   (ucall).args[GUEST_LINE], "%s", (ucall).buffer)
 
 #endif /* SELFTEST_KVM_UCALL_COMMON_H */
index aa434c8..4fd0421 100644 (file)
@@ -239,7 +239,12 @@ struct kvm_x86_cpu_property {
 #define X86_PROPERTY_MAX_BASIC_LEAF            KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
 #define X86_PROPERTY_PMU_VERSION               KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
 #define X86_PROPERTY_PMU_NR_GP_COUNTERS                KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
 #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_PMU_EVENTS_MASK           KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK        KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
+#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS     KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH      KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
 
 #define X86_PROPERTY_SUPPORTED_XCR0_LO         KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
 #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0      KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h
new file mode 100644 (file)
index 0000000..06b244b
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON       KVM_EXIT_IO
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+#endif
index b3b00be..69f26d8 100644 (file)
@@ -200,7 +200,7 @@ static void *vcpu_worker(void *data)
                if (READ_ONCE(host_quit))
                        return NULL;
 
-               clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+               clock_gettime(CLOCK_MONOTONIC, &start);
                ret = _vcpu_run(vcpu);
                ts_diff = timespec_elapsed(start);
 
@@ -367,7 +367,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        /* Test the stage of KVM creating mappings */
        *current_stage = KVM_CREATE_MAPPINGS;
 
-       clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+       clock_gettime(CLOCK_MONOTONIC, &start);
        vcpus_complete_new_stage(*current_stage);
        ts_diff = timespec_elapsed(start);
 
@@ -380,7 +380,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
        *current_stage = KVM_UPDATE_MAPPINGS;
 
-       clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+       clock_gettime(CLOCK_MONOTONIC, &start);
        vcpus_complete_new_stage(*current_stage);
        ts_diff = timespec_elapsed(start);
 
@@ -392,7 +392,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
        *current_stage = KVM_ADJUST_MAPPINGS;
 
-       clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+       clock_gettime(CLOCK_MONOTONIC, &start);
        vcpus_complete_new_stage(*current_stage);
        ts_diff = timespec_elapsed(start);
 
index f212bd8..ddab0ce 100644 (file)
@@ -6,11 +6,7 @@
  */
 #include "kvm_util.h"
 
-/*
- * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
- * VM), it must not be accessed from host code.
- */
-static vm_vaddr_t *ucall_exit_mmio_addr;
+vm_vaddr_t *ucall_exit_mmio_addr;
 
 void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
@@ -23,11 +19,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
        write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
 }
 
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       WRITE_ONCE(*ucall_exit_mmio_addr, uc);
-}
-
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
new file mode 100644 (file)
index 0000000..c4a69d8
--- /dev/null
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define APPEND_BUFFER_SAFE(str, end, v) \
+do {                                   \
+       GUEST_ASSERT(str < end);        \
+       *str++ = (v);                   \
+} while (0)
+
+static int isdigit(int ch)
+{
+       return (ch >= '0') && (ch <= '9');
+}
+
+static int skip_atoi(const char **s)
+{
+       int i = 0;
+
+       while (isdigit(**s))
+               i = i * 10 + *((*s)++) - '0';
+       return i;
+}
+
+#define ZEROPAD        1               /* pad with zero */
+#define SIGN   2               /* unsigned/signed long */
+#define PLUS   4               /* show plus */
+#define SPACE  8               /* space if plus */
+#define LEFT   16              /* left justified */
+#define SMALL  32              /* Must be 32 == 0x20 */
+#define SPECIAL        64              /* 0x */
+
+#define __do_div(n, base)                              \
+({                                                     \
+       int __res;                                      \
+                                                       \
+       __res = ((uint64_t) n) % (uint32_t) base;       \
+       n = ((uint64_t) n) / (uint32_t) base;           \
+       __res;                                          \
+})
+
+static char *number(char *str, const char *end, long num, int base, int size,
+                   int precision, int type)
+{
+       /* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
+       static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+       char tmp[66];
+       char c, sign, locase;
+       int i;
+
+       /*
+        * locase = 0 or 0x20. ORing digits or letters with 'locase'
+        * produces same digits or (maybe lowercased) letters
+        */
+       locase = (type & SMALL);
+       if (type & LEFT)
+               type &= ~ZEROPAD;
+       if (base < 2 || base > 16)
+               return NULL;
+       c = (type & ZEROPAD) ? '0' : ' ';
+       sign = 0;
+       if (type & SIGN) {
+               if (num < 0) {
+                       sign = '-';
+                       num = -num;
+                       size--;
+               } else if (type & PLUS) {
+                       sign = '+';
+                       size--;
+               } else if (type & SPACE) {
+                       sign = ' ';
+                       size--;
+               }
+       }
+       if (type & SPECIAL) {
+               if (base == 16)
+                       size -= 2;
+               else if (base == 8)
+                       size--;
+       }
+       i = 0;
+       if (num == 0)
+               tmp[i++] = '0';
+       else
+               while (num != 0)
+                       tmp[i++] = (digits[__do_div(num, base)] | locase);
+       if (i > precision)
+               precision = i;
+       size -= precision;
+       if (!(type & (ZEROPAD + LEFT)))
+               while (size-- > 0)
+                       APPEND_BUFFER_SAFE(str, end, ' ');
+       if (sign)
+               APPEND_BUFFER_SAFE(str, end, sign);
+       if (type & SPECIAL) {
+               if (base == 8)
+                       APPEND_BUFFER_SAFE(str, end, '0');
+               else if (base == 16) {
+                       APPEND_BUFFER_SAFE(str, end, '0');
+                       APPEND_BUFFER_SAFE(str, end, 'x');
+               }
+       }
+       if (!(type & LEFT))
+               while (size-- > 0)
+                       APPEND_BUFFER_SAFE(str, end, c);
+       while (i < precision--)
+               APPEND_BUFFER_SAFE(str, end, '0');
+       while (i-- > 0)
+               APPEND_BUFFER_SAFE(str, end, tmp[i]);
+       while (size-- > 0)
+               APPEND_BUFFER_SAFE(str, end, ' ');
+
+       return str;
+}
+
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args)
+{
+       char *str, *end;
+       const char *s;
+       uint64_t num;
+       int i, base;
+       int len;
+
+       int flags;              /* flags to number() */
+
+       int field_width;        /* width of output field */
+       int precision;          /*
+                                * min. # of digits for integers; max
+                                * number of chars for from string
+                                */
+       int qualifier;          /* 'h', 'l', or 'L' for integer fields */
+
+       end = buf + n;
+       GUEST_ASSERT(buf < end);
+       GUEST_ASSERT(n > 0);
+
+       for (str = buf; *fmt; ++fmt) {
+               if (*fmt != '%') {
+                       APPEND_BUFFER_SAFE(str, end, *fmt);
+                       continue;
+               }
+
+               /* process flags */
+               flags = 0;
+repeat:
+               ++fmt;          /* this also skips first '%' */
+               switch (*fmt) {
+               case '-':
+                       flags |= LEFT;
+                       goto repeat;
+               case '+':
+                       flags |= PLUS;
+                       goto repeat;
+               case ' ':
+                       flags |= SPACE;
+                       goto repeat;
+               case '#':
+                       flags |= SPECIAL;
+                       goto repeat;
+               case '0':
+                       flags |= ZEROPAD;
+                       goto repeat;
+               }
+
+               /* get field width */
+               field_width = -1;
+               if (isdigit(*fmt))
+                       field_width = skip_atoi(&fmt);
+               else if (*fmt == '*') {
+                       ++fmt;
+                       /* it's the next argument */
+                       field_width = va_arg(args, int);
+                       if (field_width < 0) {
+                               field_width = -field_width;
+                               flags |= LEFT;
+                       }
+               }
+
+               /* get the precision */
+               precision = -1;
+               if (*fmt == '.') {
+                       ++fmt;
+                       if (isdigit(*fmt))
+                               precision = skip_atoi(&fmt);
+                       else if (*fmt == '*') {
+                               ++fmt;
+                               /* it's the next argument */
+                               precision = va_arg(args, int);
+                       }
+                       if (precision < 0)
+                               precision = 0;
+               }
+
+               /* get the conversion qualifier */
+               qualifier = -1;
+               if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+                       qualifier = *fmt;
+                       ++fmt;
+               }
+
+               /* default base */
+               base = 10;
+
+               switch (*fmt) {
+               case 'c':
+                       if (!(flags & LEFT))
+                               while (--field_width > 0)
+                                       APPEND_BUFFER_SAFE(str, end, ' ');
+                       APPEND_BUFFER_SAFE(str, end,
+                                           (uint8_t)va_arg(args, int));
+                       while (--field_width > 0)
+                               APPEND_BUFFER_SAFE(str, end, ' ');
+                       continue;
+
+               case 's':
+                       s = va_arg(args, char *);
+                       len = strnlen(s, precision);
+
+                       if (!(flags & LEFT))
+                               while (len < field_width--)
+                                       APPEND_BUFFER_SAFE(str, end, ' ');
+                       for (i = 0; i < len; ++i)
+                               APPEND_BUFFER_SAFE(str, end, *s++);
+                       while (len < field_width--)
+                               APPEND_BUFFER_SAFE(str, end, ' ');
+                       continue;
+
+               case 'p':
+                       if (field_width == -1) {
+                               field_width = 2 * sizeof(void *);
+                               flags |= SPECIAL | SMALL | ZEROPAD;
+                       }
+                       str = number(str, end,
+                                    (uint64_t)va_arg(args, void *), 16,
+                                    field_width, precision, flags);
+                       continue;
+
+               case 'n':
+                       if (qualifier == 'l') {
+                               long *ip = va_arg(args, long *);
+                               *ip = (str - buf);
+                       } else {
+                               int *ip = va_arg(args, int *);
+                               *ip = (str - buf);
+                       }
+                       continue;
+
+               case '%':
+                       APPEND_BUFFER_SAFE(str, end, '%');
+                       continue;
+
+               /* integer number formats - set up the flags and "break" */
+               case 'o':
+                       base = 8;
+                       break;
+
+               case 'x':
+                       flags |= SMALL;
+               case 'X':
+                       base = 16;
+                       break;
+
+               case 'd':
+               case 'i':
+                       flags |= SIGN;
+               case 'u':
+                       break;
+
+               default:
+                       APPEND_BUFFER_SAFE(str, end, '%');
+                       if (*fmt)
+                               APPEND_BUFFER_SAFE(str, end, *fmt);
+                       else
+                               --fmt;
+                       continue;
+               }
+               if (qualifier == 'l')
+                       num = va_arg(args, uint64_t);
+               else if (qualifier == 'h') {
+                       num = (uint16_t)va_arg(args, int);
+                       if (flags & SIGN)
+                               num = (int16_t)num;
+               } else if (flags & SIGN)
+                       num = va_arg(args, int);
+               else
+                       num = va_arg(args, uint32_t);
+               str = number(str, end, num, base, field_width, precision, flags);
+       }
+
+       GUEST_ASSERT(str < end);
+       *str = '\0';
+       return str - buf;
+}
+
+int guest_snprintf(char *buf, int n, const char *fmt, ...)
+{
+       va_list va;
+       int len;
+
+       va_start(va, fmt);
+       len = guest_vsnprintf(buf, n, fmt, va);
+       va_end(va);
+
+       return len;
+}
index 9741a7f..7a8af18 100644 (file)
@@ -312,6 +312,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
                                     uint32_t nr_runnable_vcpus,
                                     uint64_t extra_mem_pages)
 {
+       uint64_t page_size = vm_guest_mode_params[mode].page_size;
        uint64_t nr_pages;
 
        TEST_ASSERT(nr_runnable_vcpus,
@@ -340,6 +341,9 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
         */
        nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
 
+       /* Account for the number of pages needed by ucall. */
+       nr_pages += ucall_nr_pages_required(page_size);
+
        return vm_adjust_num_guest_pages(mode, nr_pages);
 }
 
@@ -994,7 +998,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
        if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
                alignment = max(backing_src_pagesz, alignment);
 
-       ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+       TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
 
        /* Add enough memory to align up if necessary */
        if (alignment > 1)
index 9a3476a..fe6d100 100644 (file)
 #include "kvm_util.h"
 #include "processor.h"
 
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
 struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
                        unsigned long arg1, unsigned long arg2,
                        unsigned long arg3, unsigned long arg4,
@@ -40,13 +36,6 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
        return ret;
 }
 
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
-                 KVM_RISCV_SELFTESTS_SBI_UCALL,
-                 uc, 0, 0, 0, 0, 0);
-}
-
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
index a7f02dc..cca9873 100644 (file)
@@ -6,16 +6,6 @@
  */
 #include "kvm_util.h"
 
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
-       /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
-       asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
-}
-
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
index 50e0cf4..88cb6b8 100644 (file)
@@ -634,7 +634,6 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
                                tmp = node_prev(s, nodep);
 
                        node_rm(s, nodep);
-                       nodep = NULL;
 
                        nodep = tmp;
                        reduction_performed = true;
index 632398a..5d1c872 100644 (file)
@@ -37,3 +37,12 @@ void *memset(void *s, int c, size_t count)
                *xs++ = c;
        return s;
 }
+
+size_t strnlen(const char *s, size_t count)
+{
+       const char *sc;
+
+       for (sc = s; count-- && *sc != '\0'; ++sc)
+               /* nothing */;
+       return sc - s;
+}
index b772193..3e36019 100644 (file)
@@ -5,6 +5,9 @@
  * Copyright (C) 2020, Google LLC.
  */
 
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdarg.h>
 #include <assert.h>
 #include <ctype.h>
 #include <limits.h>
@@ -377,3 +380,15 @@ int atoi_paranoid(const char *num_str)
 
        return num;
 }
+
+char *strdup_printf(const char *fmt, ...)
+{
+       va_list ap;
+       char *str;
+
+       va_start(ap, fmt);
+       vasprintf(&str, fmt, ap);
+       va_end(ap);
+
+       return str;
+}
index 2f0e2ea..816a3fa 100644 (file)
@@ -11,6 +11,11 @@ struct ucall_header {
        struct ucall ucalls[KVM_MAX_VCPUS];
 };
 
+int ucall_nr_pages_required(uint64_t page_size)
+{
+       return align_up(sizeof(struct ucall_header), page_size) / page_size;
+}
+
 /*
  * ucall_pool holds per-VM values (global data is duplicated by each VM), it
  * must not be accessed from host code.
@@ -70,6 +75,45 @@ static void ucall_free(struct ucall *uc)
        clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
 }
 
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+                 unsigned int line, const char *fmt, ...)
+{
+       struct ucall *uc;
+       va_list va;
+
+       uc = ucall_alloc();
+       uc->cmd = cmd;
+
+       WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp));
+       WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file));
+       WRITE_ONCE(uc->args[GUEST_LINE], line);
+
+       va_start(va, fmt);
+       guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+       va_end(va);
+
+       ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+       ucall_free(uc);
+}
+
+void ucall_fmt(uint64_t cmd, const char *fmt, ...)
+{
+       struct ucall *uc;
+       va_list va;
+
+       uc = ucall_alloc();
+       uc->cmd = cmd;
+
+       va_start(va, fmt);
+       guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+       va_end(va);
+
+       ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+       ucall_free(uc);
+}
+
 void ucall(uint64_t cmd, int nargs, ...)
 {
        struct ucall *uc;
index d4a0b50..d828837 100644 (file)
@@ -1074,11 +1074,6 @@ static bool kvm_fixup_exception(struct ex_regs *regs)
        return true;
 }
 
-void kvm_exit_unexpected_vector(uint32_t value)
-{
-       ucall(UCALL_UNHANDLED, 1, value);
-}
-
 void route_exception(struct ex_regs *regs)
 {
        typedef void(*handler)(struct ex_regs *);
@@ -1092,7 +1087,10 @@ void route_exception(struct ex_regs *regs)
        if (kvm_fixup_exception(regs))
                return;
 
-       kvm_exit_unexpected_vector(regs->vector);
+       ucall_assert(UCALL_UNHANDLED,
+                    "Unhandled exception in guest", __FILE__, __LINE__,
+                    "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+                    regs->vector, regs->rip);
 }
 
 void vm_init_descriptor_tables(struct kvm_vm *vm)
@@ -1135,12 +1133,8 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
 {
        struct ucall uc;
 
-       if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
-               uint64_t vector = uc.args[0];
-
-               TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
-                         vector);
-       }
+       if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
+               REPORT_GUEST_ASSERT(uc);
 }
 
 const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
index 4d41dc6..1265cec 100644 (file)
@@ -8,14 +8,38 @@
 
 #define UCALL_PIO_PORT ((uint16_t)0x1000)
 
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
 void ucall_arch_do_ucall(vm_vaddr_t uc)
 {
-       asm volatile("in %[port], %%al"
-               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory");
+       /*
+        * FIXME: Revert this hack (the entire commit that added it) once nVMX
+        * preserves L2 GPRs across a nested VM-Exit.  If a ucall from L2, e.g.
+        * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+        * clobbered by L1.  Save and restore non-volatile GPRs (clobbering RBP
+        * in particular is problematic) along with RDX and RDI (which are
+        * inputs), and clobber volatile GPRs. *sigh*
+        */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+       "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+       asm volatile("push %%rbp\n\t"
+                    "push %%r15\n\t"
+                    "push %%r14\n\t"
+                    "push %%r13\n\t"
+                    "push %%r12\n\t"
+                    "push %%rbx\n\t"
+                    "push %%rdx\n\t"
+                    "push %%rdi\n\t"
+                    "in %[port], %%al\n\t"
+                    "pop %%rdi\n\t"
+                    "pop %%rdx\n\t"
+                    "pop %%rbx\n\t"
+                    "pop %%r12\n\t"
+                    "pop %%r13\n\t"
+                    "pop %%r14\n\t"
+                    "pop %%r15\n\t"
+                    "pop %%rbp\n\t"
+               : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+                    HORRIFIC_L2_UCALL_CLOBBER_HACK);
 }
 
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
index feaf2be..6628dc4 100644 (file)
@@ -55,7 +55,7 @@ static void rendezvous_with_boss(void)
 static void run_vcpu(struct kvm_vcpu *vcpu)
 {
        vcpu_run(vcpu);
-       ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
 }
 
 static void *vcpu_worker(void *data)
index 4210cd2..20eb2e7 100644 (file)
@@ -157,7 +157,7 @@ static void *vcpu_worker(void *__data)
                                goto done;
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_1(uc, "val = %lu");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
@@ -560,7 +560,7 @@ static void guest_code_test_memslot_rw(void)
                     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
                        uint64_t val = *(uint64_t *)ptr;
 
-                       GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
+                       GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
                        *(uint64_t *)ptr = 0;
                }
 
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
new file mode 100644 (file)
index 0000000..d8ecacd
--- /dev/null
@@ -0,0 +1,872 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (c) 2023 Intel Corporation
+ *
+ */
+#include <stdio.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK)
+
+bool filter_reg(__u64 reg)
+{
+       /*
+        * Some ISA extensions are optional and not present on all host,
+        * but they can't be disabled through ISA_EXT registers when present.
+        * So, to make life easy, just filtering out these kind of registers.
+        */
+       switch (reg & ~REG_MASK) {
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI:
+       case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM:
+               return true;
+       default:
+               break;
+       }
+
+       return false;
+}
+
+bool check_reject_set(int err)
+{
+       return err == EINVAL;
+}
+
+static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext)
+{
+       int ret;
+       unsigned long value;
+
+       ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value);
+       if (ret) {
+               printf("Failed to get ext %d", ext);
+               return false;
+       }
+
+       return !!value;
+}
+
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+       struct vcpu_reg_sublist *s;
+
+       /*
+        * Disable all extensions which were enabled by default
+        * if they were available in the risc-v host.
+        */
+       for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++)
+               __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0);
+
+       for_each_sublist(c, s) {
+               if (!s->feature)
+                       continue;
+
+               /* Try to enable the desired extension */
+               __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(s->feature), 1);
+
+               /* Double check whether the desired extension was enabled */
+               __TEST_REQUIRE(vcpu_has_ext(vcpu, s->feature),
+                              "%s not available, skipping tests\n", s->name);
+       }
+}
+
+static const char *config_id_to_str(__u64 id)
+{
+       /* reg_off is the offset into struct kvm_riscv_config */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_CONFIG_REG(isa):
+               return "KVM_REG_RISCV_CONFIG_REG(isa)";
+       case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+               return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)";
+       case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+               return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)";
+       case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+               return "KVM_REG_RISCV_CONFIG_REG(mvendorid)";
+       case KVM_REG_RISCV_CONFIG_REG(marchid):
+               return "KVM_REG_RISCV_CONFIG_REG(marchid)";
+       case KVM_REG_RISCV_CONFIG_REG(mimpid):
+               return "KVM_REG_RISCV_CONFIG_REG(mimpid)";
+       case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+               return "KVM_REG_RISCV_CONFIG_REG(satp_mode)";
+       }
+
+       /*
+        * Config regs would grow regularly with new pseudo reg added, so
+        * just show raw id to indicate a new pseudo config reg.
+        */
+       return strdup_printf("KVM_REG_RISCV_CONFIG_REG(%lld) /* UNKNOWN */", reg_off);
+}
+
+static const char *core_id_to_str(const char *prefix, __u64 id)
+{
+       /* reg_off is the offset into struct kvm_riscv_core */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_CORE_REG(regs.pc):
+               return "KVM_REG_RISCV_CORE_REG(regs.pc)";
+       case KVM_REG_RISCV_CORE_REG(regs.ra):
+               return "KVM_REG_RISCV_CORE_REG(regs.ra)";
+       case KVM_REG_RISCV_CORE_REG(regs.sp):
+               return "KVM_REG_RISCV_CORE_REG(regs.sp)";
+       case KVM_REG_RISCV_CORE_REG(regs.gp):
+               return "KVM_REG_RISCV_CORE_REG(regs.gp)";
+       case KVM_REG_RISCV_CORE_REG(regs.tp):
+               return "KVM_REG_RISCV_CORE_REG(regs.tp)";
+       case KVM_REG_RISCV_CORE_REG(regs.t0) ... KVM_REG_RISCV_CORE_REG(regs.t2):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.t0));
+       case KVM_REG_RISCV_CORE_REG(regs.s0) ... KVM_REG_RISCV_CORE_REG(regs.s1):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.s0));
+       case KVM_REG_RISCV_CORE_REG(regs.a0) ... KVM_REG_RISCV_CORE_REG(regs.a7):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.a%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.a0));
+       case KVM_REG_RISCV_CORE_REG(regs.s2) ... KVM_REG_RISCV_CORE_REG(regs.s11):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.s2) + 2);
+       case KVM_REG_RISCV_CORE_REG(regs.t3) ... KVM_REG_RISCV_CORE_REG(regs.t6):
+               return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+                          reg_off - KVM_REG_RISCV_CORE_REG(regs.t3) + 3);
+       case KVM_REG_RISCV_CORE_REG(mode):
+               return "KVM_REG_RISCV_CORE_REG(mode)";
+       }
+
+       TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+#define RISCV_CSR_GENERAL(csr) \
+       "KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")"
+#define RISCV_CSR_AIA(csr) \
+       "KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")"
+
+static const char *general_csr_id_to_str(__u64 reg_off)
+{
+       /* reg_off is the offset into struct kvm_riscv_csr */
+       switch (reg_off) {
+       case KVM_REG_RISCV_CSR_REG(sstatus):
+               return RISCV_CSR_GENERAL(sstatus);
+       case KVM_REG_RISCV_CSR_REG(sie):
+               return RISCV_CSR_GENERAL(sie);
+       case KVM_REG_RISCV_CSR_REG(stvec):
+               return RISCV_CSR_GENERAL(stvec);
+       case KVM_REG_RISCV_CSR_REG(sscratch):
+               return RISCV_CSR_GENERAL(sscratch);
+       case KVM_REG_RISCV_CSR_REG(sepc):
+               return RISCV_CSR_GENERAL(sepc);
+       case KVM_REG_RISCV_CSR_REG(scause):
+               return RISCV_CSR_GENERAL(scause);
+       case KVM_REG_RISCV_CSR_REG(stval):
+               return RISCV_CSR_GENERAL(stval);
+       case KVM_REG_RISCV_CSR_REG(sip):
+               return RISCV_CSR_GENERAL(sip);
+       case KVM_REG_RISCV_CSR_REG(satp):
+               return RISCV_CSR_GENERAL(satp);
+       case KVM_REG_RISCV_CSR_REG(scounteren):
+               return RISCV_CSR_GENERAL(scounteren);
+       }
+
+       TEST_FAIL("Unknown general csr reg: 0x%llx", reg_off);
+       return NULL;
+}
+
+static const char *aia_csr_id_to_str(__u64 reg_off)
+{
+       /* reg_off is the offset into struct kvm_riscv_aia_csr */
+       switch (reg_off) {
+       case KVM_REG_RISCV_CSR_AIA_REG(siselect):
+               return RISCV_CSR_AIA(siselect);
+       case KVM_REG_RISCV_CSR_AIA_REG(iprio1):
+               return RISCV_CSR_AIA(iprio1);
+       case KVM_REG_RISCV_CSR_AIA_REG(iprio2):
+               return RISCV_CSR_AIA(iprio2);
+       case KVM_REG_RISCV_CSR_AIA_REG(sieh):
+               return RISCV_CSR_AIA(sieh);
+       case KVM_REG_RISCV_CSR_AIA_REG(siph):
+               return RISCV_CSR_AIA(siph);
+       case KVM_REG_RISCV_CSR_AIA_REG(iprio1h):
+               return RISCV_CSR_AIA(iprio1h);
+       case KVM_REG_RISCV_CSR_AIA_REG(iprio2h):
+               return RISCV_CSR_AIA(iprio2h);
+       }
+
+       TEST_FAIL("Unknown aia csr reg: 0x%llx", reg_off);
+       return NULL;
+}
+
+static const char *csr_id_to_str(const char *prefix, __u64 id)
+{
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR);
+       __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+       reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_CSR_GENERAL:
+               return general_csr_id_to_str(reg_off);
+       case KVM_REG_RISCV_CSR_AIA:
+               return aia_csr_id_to_str(reg_off);
+       }
+
+       TEST_FAIL("%s: Unknown csr subtype: 0x%llx", prefix, reg_subtype);
+       return NULL;
+}
+
+static const char *timer_id_to_str(const char *prefix, __u64 id)
+{
+       /* reg_off is the offset into struct kvm_riscv_timer */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_TIMER_REG(frequency):
+               return "KVM_REG_RISCV_TIMER_REG(frequency)";
+       case KVM_REG_RISCV_TIMER_REG(time):
+               return "KVM_REG_RISCV_TIMER_REG(time)";
+       case KVM_REG_RISCV_TIMER_REG(compare):
+               return "KVM_REG_RISCV_TIMER_REG(compare)";
+       case KVM_REG_RISCV_TIMER_REG(state):
+               return "KVM_REG_RISCV_TIMER_REG(state)";
+       }
+
+       TEST_FAIL("%s: Unknown timer reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+static const char *fp_f_id_to_str(const char *prefix, __u64 id)
+{
+       /* reg_off is the offset into struct __riscv_f_ext_state */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_FP_F_REG(f[0]) ...
+            KVM_REG_RISCV_FP_F_REG(f[31]):
+               return strdup_printf("KVM_REG_RISCV_FP_F_REG(f[%lld])", reg_off);
+       case KVM_REG_RISCV_FP_F_REG(fcsr):
+               return "KVM_REG_RISCV_FP_F_REG(fcsr)";
+       }
+
+       TEST_FAIL("%s: Unknown fp_f reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+static const char *fp_d_id_to_str(const char *prefix, __u64 id)
+{
+       /* reg_off is the offset into struct __riscv_d_ext_state */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D);
+
+       switch (reg_off) {
+       case KVM_REG_RISCV_FP_D_REG(f[0]) ...
+            KVM_REG_RISCV_FP_D_REG(f[31]):
+               return strdup_printf("KVM_REG_RISCV_FP_D_REG(f[%lld])", reg_off);
+       case KVM_REG_RISCV_FP_D_REG(fcsr):
+               return "KVM_REG_RISCV_FP_D_REG(fcsr)";
+       }
+
+       TEST_FAIL("%s: Unknown fp_d reg id: 0x%llx", prefix, id);
+       return NULL;
+}
+
+static const char *isa_ext_id_to_str(__u64 id)
+{
+       /* reg_off is the offset into unsigned long kvm_isa_ext_arr[] */
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT);
+
+       static const char * const kvm_isa_ext_reg_name[] = {
+               "KVM_RISCV_ISA_EXT_A",
+               "KVM_RISCV_ISA_EXT_C",
+               "KVM_RISCV_ISA_EXT_D",
+               "KVM_RISCV_ISA_EXT_F",
+               "KVM_RISCV_ISA_EXT_H",
+               "KVM_RISCV_ISA_EXT_I",
+               "KVM_RISCV_ISA_EXT_M",
+               "KVM_RISCV_ISA_EXT_SVPBMT",
+               "KVM_RISCV_ISA_EXT_SSTC",
+               "KVM_RISCV_ISA_EXT_SVINVAL",
+               "KVM_RISCV_ISA_EXT_ZIHINTPAUSE",
+               "KVM_RISCV_ISA_EXT_ZICBOM",
+               "KVM_RISCV_ISA_EXT_ZICBOZ",
+               "KVM_RISCV_ISA_EXT_ZBB",
+               "KVM_RISCV_ISA_EXT_SSAIA",
+               "KVM_RISCV_ISA_EXT_V",
+               "KVM_RISCV_ISA_EXT_SVNAPOT",
+               "KVM_RISCV_ISA_EXT_ZBA",
+               "KVM_RISCV_ISA_EXT_ZBS",
+               "KVM_RISCV_ISA_EXT_ZICNTR",
+               "KVM_RISCV_ISA_EXT_ZICSR",
+               "KVM_RISCV_ISA_EXT_ZIFENCEI",
+               "KVM_RISCV_ISA_EXT_ZIHPM",
+       };
+
+       if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) {
+               /*
+                * isa_ext regs would grow regularly with new isa extension added, so
+                * just show "reg" to indicate a new extension.
+                */
+               return strdup_printf("%lld /* UNKNOWN */", reg_off);
+       }
+
+       return kvm_isa_ext_reg_name[reg_off];
+}
+
+static const char *sbi_ext_single_id_to_str(__u64 reg_off)
+{
+       /* reg_off is KVM_RISCV_SBI_EXT_ID */
+       static const char * const kvm_sbi_ext_reg_name[] = {
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL",
+               "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR",
+       };
+
+       if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) {
+               /*
+                * sbi_ext regs would grow regularly with new sbi extension added, so
+                * just show "reg" to indicate a new extension.
+                */
+               return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off);
+       }
+
+       return kvm_sbi_ext_reg_name[reg_off];
+}
+
+static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off)
+{
+       if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) {
+               /*
+                * sbi_ext regs would grow regularly with new sbi extension added, so
+                * just show "reg" to indicate a new extension.
+                */
+               return strdup_printf("%lld /* UNKNOWN */", reg_off);
+       }
+
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_SBI_MULTI_EN:
+               return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld", reg_off);
+       case KVM_REG_RISCV_SBI_MULTI_DIS:
+               return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld", reg_off);
+       }
+
+       return NULL;
+}
+
+static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
+{
+       __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT);
+       __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+       reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+       switch (reg_subtype) {
+       case KVM_REG_RISCV_SBI_SINGLE:
+               return sbi_ext_single_id_to_str(reg_off);
+       case KVM_REG_RISCV_SBI_MULTI_EN:
+       case KVM_REG_RISCV_SBI_MULTI_DIS:
+               return sbi_ext_multi_id_to_str(reg_subtype, reg_off);
+       }
+
+       TEST_FAIL("%s: Unknown sbi ext subtype: 0x%llx", prefix, reg_subtype);
+       return NULL;
+}
+
+void print_reg(const char *prefix, __u64 id)
+{
+       const char *reg_size = NULL;
+
+       TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_RISCV,
+                   "%s: KVM_REG_RISCV missing in reg id: 0x%llx", prefix, id);
+
+       switch (id & KVM_REG_SIZE_MASK) {
+       case KVM_REG_SIZE_U32:
+               reg_size = "KVM_REG_SIZE_U32";
+               break;
+       case KVM_REG_SIZE_U64:
+               reg_size = "KVM_REG_SIZE_U64";
+               break;
+       case KVM_REG_SIZE_U128:
+               reg_size = "KVM_REG_SIZE_U128";
+               break;
+       default:
+               TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+                         prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+       }
+
+       switch (id & KVM_REG_RISCV_TYPE_MASK) {
+       case KVM_REG_RISCV_CONFIG:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n",
+                               reg_size, config_id_to_str(id));
+               break;
+       case KVM_REG_RISCV_CORE:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n",
+                               reg_size, core_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_CSR:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CSR | %s,\n",
+                               reg_size, csr_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_TIMER:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_TIMER | %s,\n",
+                               reg_size, timer_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_FP_F:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_F | %s,\n",
+                               reg_size, fp_f_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_FP_D:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
+                               reg_size, fp_d_id_to_str(prefix, id));
+               break;
+       case KVM_REG_RISCV_ISA_EXT:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
+                               reg_size, isa_ext_id_to_str(id));
+               break;
+       case KVM_REG_RISCV_SBI_EXT:
+               printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n",
+                               reg_size, sbi_ext_id_to_str(prefix, id));
+               break;
+       default:
+               TEST_FAIL("%s: Unexpected reg type: 0x%llx in reg id: 0x%llx", prefix,
+                               (id & KVM_REG_RISCV_TYPE_MASK) >> KVM_REG_RISCV_TYPE_SHIFT, id);
+       }
+}
+
+/*
+ * The current blessed list was primed with the output of kernel version
+ * v6.5-rc3 and then later updated with new registers.
+ */
+static __u64 base_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.gp),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.tp),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t0),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t1),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t2),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s0),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s1),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a0),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a1),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a2),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a3),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a4),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a5),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a6),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a7),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s2),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s3),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s4),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s5),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s6),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s7),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s8),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s9),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s10),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s11),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t3),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t4),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t5),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t6),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(mode),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sstatus),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sie),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stvec),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sscratch),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sepc),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scause),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stval),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_EN | 0,
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_DIS | 0,
+};
+
+/*
+ * The skips_set list registers that should skip set test.
+ *  - KVM_REG_RISCV_TIMER_REG(state): set would fail if it was not initialized properly.
+ */
+static __u64 base_skips_set[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+};
+
+static __u64 h_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H,
+};
+
+static __u64 zicbom_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM,
+};
+
+static __u64 zicboz_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ,
+};
+
+static __u64 svpbmt_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT,
+};
+
+static __u64 sstc_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC,
+};
+
+static __u64 svinval_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL,
+};
+
+static __u64 zihintpause_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
+};
+
+static __u64 zba_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA,
+};
+
+static __u64 zbb_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB,
+};
+
+static __u64 zbs_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS,
+};
+
+static __u64 zicntr_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR,
+};
+
+static __u64 zicsr_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR,
+};
+
+static __u64 zifencei_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI,
+};
+
+static __u64 zihpm_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM,
+};
+
+static __u64 aia_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA,
+};
+
+static __u64 fp_f_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[2]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[3]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[4]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[5]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[6]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[7]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[8]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[9]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[10]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[11]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[12]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[13]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[14]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[15]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[16]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[17]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[18]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[19]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[20]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[21]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[22]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[23]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[24]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[25]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[26]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[27]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[28]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[29]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[30]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[31]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(fcsr),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F,
+};
+
+static __u64 fp_d_regs[] = {
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[0]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[1]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[2]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[3]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[4]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[5]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[6]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[7]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[8]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[9]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[10]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[11]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[12]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[13]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[14]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[15]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[16]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[17]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[18]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[19]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[20]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[21]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[22]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[23]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[24]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[25]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[26]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[27]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[28]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[29]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[30]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[31]),
+       KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(fcsr),
+       KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D,
+};
+
+#define BASE_SUBLIST \
+       {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
+        .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
+#define H_REGS_SUBLIST \
+       {"h", .feature = KVM_RISCV_ISA_EXT_H, .regs = h_regs, .regs_n = ARRAY_SIZE(h_regs),}
+#define ZICBOM_REGS_SUBLIST \
+       {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),}
+#define ZICBOZ_REGS_SUBLIST \
+       {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),}
+#define SVPBMT_REGS_SUBLIST \
+       {"svpbmt", .feature = KVM_RISCV_ISA_EXT_SVPBMT, .regs = svpbmt_regs, .regs_n = ARRAY_SIZE(svpbmt_regs),}
+#define SSTC_REGS_SUBLIST \
+       {"sstc", .feature = KVM_RISCV_ISA_EXT_SSTC, .regs = sstc_regs, .regs_n = ARRAY_SIZE(sstc_regs),}
+#define SVINVAL_REGS_SUBLIST \
+       {"svinval", .feature = KVM_RISCV_ISA_EXT_SVINVAL, .regs = svinval_regs, .regs_n = ARRAY_SIZE(svinval_regs),}
+#define ZIHINTPAUSE_REGS_SUBLIST \
+       {"zihintpause", .feature = KVM_RISCV_ISA_EXT_ZIHINTPAUSE, .regs = zihintpause_regs, .regs_n = ARRAY_SIZE(zihintpause_regs),}
+#define ZBA_REGS_SUBLIST \
+       {"zba", .feature = KVM_RISCV_ISA_EXT_ZBA, .regs = zba_regs, .regs_n = ARRAY_SIZE(zba_regs),}
+#define ZBB_REGS_SUBLIST \
+       {"zbb", .feature = KVM_RISCV_ISA_EXT_ZBB, .regs = zbb_regs, .regs_n = ARRAY_SIZE(zbb_regs),}
+#define ZBS_REGS_SUBLIST \
+       {"zbs", .feature = KVM_RISCV_ISA_EXT_ZBS, .regs = zbs_regs, .regs_n = ARRAY_SIZE(zbs_regs),}
+#define ZICNTR_REGS_SUBLIST \
+       {"zicntr", .feature = KVM_RISCV_ISA_EXT_ZICNTR, .regs = zicntr_regs, .regs_n = ARRAY_SIZE(zicntr_regs),}
+#define ZICSR_REGS_SUBLIST \
+       {"zicsr", .feature = KVM_RISCV_ISA_EXT_ZICSR, .regs = zicsr_regs, .regs_n = ARRAY_SIZE(zicsr_regs),}
+#define ZIFENCEI_REGS_SUBLIST \
+       {"zifencei", .feature = KVM_RISCV_ISA_EXT_ZIFENCEI, .regs = zifencei_regs, .regs_n = ARRAY_SIZE(zifencei_regs),}
+#define ZIHPM_REGS_SUBLIST \
+       {"zihpm", .feature = KVM_RISCV_ISA_EXT_ZIHPM, .regs = zihpm_regs, .regs_n = ARRAY_SIZE(zihpm_regs),}
+#define AIA_REGS_SUBLIST \
+       {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),}
+#define FP_F_REGS_SUBLIST \
+       {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \
+               .regs_n = ARRAY_SIZE(fp_f_regs),}
+#define FP_D_REGS_SUBLIST \
+       {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
+               .regs_n = ARRAY_SIZE(fp_d_regs),}
+
+static struct vcpu_reg_list h_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       H_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zicbom_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZICBOM_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zicboz_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZICBOZ_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list svpbmt_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVPBMT_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list sstc_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SSTC_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list svinval_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       SVINVAL_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zihintpause_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZIHINTPAUSE_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zba_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZBA_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zbb_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZBB_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zbs_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZBS_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zicntr_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZICNTR_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zicsr_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZICSR_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zifencei_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZIFENCEI_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list zihpm_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       ZIHPM_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list aia_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       AIA_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list fp_f_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       FP_F_REGS_SUBLIST,
+       {0},
+       },
+};
+
+static struct vcpu_reg_list fp_d_config = {
+       .sublists = {
+       BASE_SUBLIST,
+       FP_D_REGS_SUBLIST,
+       {0},
+       },
+};
+
+struct vcpu_reg_list *vcpu_configs[] = {
+       &h_config,
+       &zicbom_config,
+       &zicboz_config,
+       &svpbmt_config,
+       &sstc_config,
+       &svinval_config,
+       &zihintpause_config,
+       &zba_config,
+       &zbb_config,
+       &zbs_config,
+       &zicntr_config,
+       &zicsr_config,
+       &zifencei_config,
+       &zihpm_config,
+       &aia_config,
+       &fp_f_config,
+       &fp_d_config,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
index 1d73e78..c8e0a64 100644 (file)
@@ -237,8 +237,8 @@ static void test_get_cmma_basic(void)
 
        /* GET_CMMA_BITS without CMMA enabled should fail */
        rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno_out, ENXIO);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, ENXIO);
 
        enable_cmma(vm);
        vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
@@ -247,31 +247,31 @@ static void test_get_cmma_basic(void)
 
        /* GET_CMMA_BITS without migration mode and without peeking should fail */
        rc = vm_get_cmma_bits(vm, 0, &errno_out);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno_out, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
 
        /* GET_CMMA_BITS without migration mode and with peeking should work */
        rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
-       ASSERT_EQ(rc, 0);
-       ASSERT_EQ(errno_out, 0);
+       TEST_ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(errno_out, 0);
 
        enable_dirty_tracking(vm);
        enable_migration_mode(vm);
 
        /* GET_CMMA_BITS with invalid flags */
        rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno_out, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno_out, EINVAL);
 
        kvm_vm_free(vm);
 }
 
 static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
 {
-       ASSERT_EQ(vcpu->run->exit_reason, 13);
-       ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
-       ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
-       ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+       TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
 }
 
 static void test_migration_mode(void)
@@ -283,8 +283,8 @@ static void test_migration_mode(void)
 
        /* enabling migration mode on a VM without memory should fail */
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
        TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
        errno = 0;
 
@@ -304,8 +304,8 @@ static void test_migration_mode(void)
 
        /* migration mode when memslots have dirty tracking off should fail */
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, -1);
-       ASSERT_EQ(errno, EINVAL);
+       TEST_ASSERT_EQ(rc, -1);
+       TEST_ASSERT_EQ(errno, EINVAL);
        TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
        errno = 0;
 
@@ -314,7 +314,7 @@ static void test_migration_mode(void)
 
        /* enabling migration mode should work now */
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(rc, 0);
        TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
        errno = 0;
 
@@ -350,7 +350,7 @@ static void test_migration_mode(void)
         */
        vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
        rc = __enable_migration_mode(vm);
-       ASSERT_EQ(rc, 0);
+       TEST_ASSERT_EQ(rc, 0);
        TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
        errno = 0;
 
@@ -394,9 +394,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
        };
        memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
        vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
-       ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
-       ASSERT_EQ(args.start_gfn, 0);
+       TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
 
        /* ...and then - after a hole - the TEST_DATA memslot should follow */
        args = (struct kvm_s390_cmma_log){
@@ -407,9 +407,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
        };
        memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
        vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
-       ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
-       ASSERT_EQ(args.remaining, 0);
+       TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+       TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+       TEST_ASSERT_EQ(args.remaining, 0);
 
        /* ...and nothing else should be there */
        args = (struct kvm_s390_cmma_log){
@@ -420,9 +420,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
        };
        memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
        vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
-       ASSERT_EQ(args.count, 0);
-       ASSERT_EQ(args.start_gfn, 0);
-       ASSERT_EQ(args.remaining, 0);
+       TEST_ASSERT_EQ(args.count, 0);
+       TEST_ASSERT_EQ(args.start_gfn, 0);
+       TEST_ASSERT_EQ(args.remaining, 0);
 }
 
 /**
@@ -498,11 +498,11 @@ static void assert_cmma_dirty(u64 first_dirty_gfn,
                              u64 dirty_gfn_count,
                              const struct kvm_s390_cmma_log *res)
 {
-       ASSERT_EQ(res->start_gfn, first_dirty_gfn);
-       ASSERT_EQ(res->count, dirty_gfn_count);
+       TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+       TEST_ASSERT_EQ(res->count, dirty_gfn_count);
        for (size_t i = 0; i < dirty_gfn_count; i++)
-               ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
-       ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+               TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+       TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
 }
 
 static void test_get_skip_holes(void)
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c
new file mode 100644 (file)
index 0000000..84313fb
--- /dev/null
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define ICPT_INSTRUCTION 0x04
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+    "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+                                     size_t new_psw_off, uint64_t *new_psw)
+{
+       struct kvm_guest_debug debug = {};
+       struct kvm_regs regs;
+       struct kvm_vm *vm;
+       char *lowcore;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       lowcore = addr_gpa2hva(vm, 0);
+       new_psw[0] = (*vcpu)->run->psw_mask;
+       new_psw[1] = (uint64_t)int_handler;
+       memcpy(lowcore + new_psw_off, new_psw, 16);
+       vcpu_regs_get(*vcpu, &regs);
+       regs.gprs[2] = -1;
+       vcpu_regs_set(*vcpu, &regs);
+       debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+       vcpu_guest_debug_set(*vcpu, &debug);
+       vcpu_run(*vcpu);
+
+       return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+    ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+    "j .\n");
+
+static void test_step_pgm(void)
+{
+       test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+    "diag %r0,%r0,0\n"
+    "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+       struct kvm_s390_irq irq = {
+               .type = KVM_S390_PROGRAM_INT,
+               .u.pgm.code = PGM_SPECIFICATION,
+       };
+       struct kvm_vcpu *vcpu;
+       uint64_t new_psw[2];
+       struct kvm_vm *vm;
+
+       vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+                            __LC_PGM_NEW_PSW, new_psw);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION);
+       TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+       vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+       TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+       TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+       kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+    "iske %r2,%r2\n"
+    "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+       test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+    "lctl %c0,%c0,1\n"
+    "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+       test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+    "svc 0\n"
+    "j .\n");
+
+static void test_step_svc(void)
+{
+       test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+       const char *name;
+       void (*test)(void);
+} testlist[] = {
+       { "single-step pgm", test_step_pgm },
+       { "single-step pgm caused by diag", test_step_pgm_diag },
+       { "single-step pgm caused by iske", test_step_pgm_iske },
+       { "single-step pgm caused by lctl", test_step_pgm_lctl },
+       { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+       int idx;
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(testlist));
+       for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+               testlist[idx].test();
+               ksft_test_result_pass("%s\n", testlist[idx].name);
+       }
+       ksft_finished();
+}
index 8e4b94d..bb3ca9a 100644 (file)
@@ -4,7 +4,6 @@
  *
  * Copyright (C) 2019, Red Hat, Inc.
  */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -279,10 +278,10 @@ enum stage {
        vcpu_run(__vcpu);                                               \
        get_ucall(__vcpu, &uc);                                         \
        if (uc.cmd == UCALL_ABORT) {                                    \
-               REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu");           \
+               REPORT_GUEST_ASSERT(uc);                                \
        }                                                               \
-       ASSERT_EQ(uc.cmd, UCALL_SYNC);                                  \
-       ASSERT_EQ(uc.args[1], __stage);                                 \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                             \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                            \
 })                                                                     \
 
 static void prepare_mem12(void)
@@ -469,7 +468,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val)
        case 16:
                return val;
        }
-       GUEST_ASSERT_1(false, "Invalid size");
+       GUEST_FAIL("Invalid size = %u", size);
        return 0;
 }
 
@@ -598,7 +597,7 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t
                        return ret;
                }
        }
-       GUEST_ASSERT_1(false, "Invalid size");
+       GUEST_FAIL("Invalid size = %u", size);
        return 0;
 }
 
@@ -808,7 +807,7 @@ static void test_termination(void)
        HOST_SYNC(t.vcpu, STAGE_IDLED);
        MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
        /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
-       ASSERT_EQ(teid & teid_mask, 0);
+       TEST_ASSERT_EQ(teid & teid_mask, 0);
 
        kvm_vm_free(t.kvm_vm);
 }
index a9a0b76..c73f948 100644 (file)
@@ -4,7 +4,6 @@
  *
  * Copyright IBM Corp. 2021
  */
-
 #include <sys/mman.h>
 #include "test_util.h"
 #include "kvm_util.h"
@@ -156,7 +155,9 @@ static enum stage perform_next_stage(int *i, bool mapped_0)
                       !mapped_0;
                if (!skip) {
                        result = test_protection(tests[*i].addr, tests[*i].key);
-                       GUEST_ASSERT_2(result == tests[*i].expected, *i, result);
+                       __GUEST_ASSERT(result == tests[*i].expected,
+                                      "Wanted %u, got %u, for i = %u",
+                                      tests[*i].expected, result, *i);
                }
        }
        return stage;
@@ -190,9 +191,9 @@ static void guest_code(void)
        vcpu_run(__vcpu);                                       \
        get_ucall(__vcpu, &uc);                                 \
        if (uc.cmd == UCALL_ABORT)                              \
-               REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu");   \
-       ASSERT_EQ(uc.cmd, UCALL_SYNC);                          \
-       ASSERT_EQ(uc.args[1], __stage);                         \
+               REPORT_GUEST_ASSERT(uc);                        \
+       TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC);                     \
+       TEST_ASSERT_EQ(uc.args[1], __stage);                    \
 })
 
 #define HOST_SYNC(vcpu, stage)                 \
index a849ce2..b329601 100644 (file)
@@ -88,7 +88,7 @@ static void *vcpu_worker(void *data)
        }
 
        if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
-               REPORT_GUEST_ASSERT_1(uc, "val = %lu");
+               REPORT_GUEST_ASSERT(uc);
 
        return NULL;
 }
@@ -156,19 +156,22 @@ static void guest_code_move_memory_region(void)
         * window where the memslot is invalid is usually quite small.
         */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+       __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+                      "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
 
        /* Spin until the misaligning memory region move completes. */
        val = guest_spin_on_val(MMIO_VAL);
-       GUEST_ASSERT_1(val == 1 || val == 0, val);
+       __GUEST_ASSERT(val == 1 || val == 0,
+                      "Expected '0' or '1' (no MMIO), got '%llx'", val);
 
        /* Spin until the memory region starts to get re-aligned. */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+       __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+                      "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
 
        /* Spin until the re-aligning memory region move completes. */
        val = guest_spin_on_val(MMIO_VAL);
-       GUEST_ASSERT_1(val == 1, val);
+       GUEST_ASSERT_EQ(val, 1);
 
        GUEST_DONE();
 }
@@ -224,15 +227,15 @@ static void guest_code_delete_memory_region(void)
 
        /* Spin until the memory region is deleted. */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == MMIO_VAL, val);
+       GUEST_ASSERT_EQ(val, MMIO_VAL);
 
        /* Spin until the memory region is recreated. */
        val = guest_spin_on_val(MMIO_VAL);
-       GUEST_ASSERT_1(val == 0, val);
+       GUEST_ASSERT_EQ(val, 0);
 
        /* Spin until the memory region is deleted. */
        val = guest_spin_on_val(0);
-       GUEST_ASSERT_1(val == MMIO_VAL, val);
+       GUEST_ASSERT_EQ(val, MMIO_VAL);
 
        asm("1:\n\t"
            ".pushsection .rodata\n\t"
@@ -249,7 +252,7 @@ static void guest_code_delete_memory_region(void)
            "final_rip_end: .quad 1b\n\t"
            ".popsection");
 
-       GUEST_ASSERT_1(0, 0);
+       GUEST_ASSERT(0);
 }
 
 static void test_delete_memory_region(void)
index c87f387..171adfb 100644 (file)
@@ -31,8 +31,8 @@ static uint64_t guest_stolen_time[NR_VCPUS];
 static void check_status(struct kvm_steal_time *st)
 {
        GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
-       GUEST_ASSERT(READ_ONCE(st->flags) == 0);
-       GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0);
 }
 
 static void guest_code(int cpu)
@@ -40,7 +40,7 @@ static void guest_code(int cpu)
        struct kvm_steal_time *st = st_gva[cpu];
        uint32_t version;
 
-       GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+       GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
 
        memset(st, 0, sizeof(*st));
        GUEST_SYNC(0);
@@ -122,8 +122,8 @@ static int64_t smccc(uint32_t func, uint64_t arg)
 
 static void check_status(struct st_time *st)
 {
-       GUEST_ASSERT(READ_ONCE(st->rev) == 0);
-       GUEST_ASSERT(READ_ONCE(st->attr) == 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->rev), 0);
+       GUEST_ASSERT_EQ(READ_ONCE(st->attr), 0);
 }
 
 static void guest_code(int cpu)
@@ -132,15 +132,15 @@ static void guest_code(int cpu)
        int64_t status;
 
        status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
-       GUEST_ASSERT(status == 0);
+       GUEST_ASSERT_EQ(status, 0);
        status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES);
-       GUEST_ASSERT(status == 0);
+       GUEST_ASSERT_EQ(status, 0);
        status = smccc(PV_TIME_FEATURES, PV_TIME_ST);
-       GUEST_ASSERT(status == 0);
+       GUEST_ASSERT_EQ(status, 0);
 
        status = smccc(PV_TIME_ST, 0);
-       GUEST_ASSERT(status != -1);
-       GUEST_ASSERT(status == (ulong)st_gva[cpu]);
+       GUEST_ASSERT_NE(status, -1);
+       GUEST_ASSERT_EQ(status, (ulong)st_gva[cpu]);
 
        st = (struct st_time *)status;
        GUEST_SYNC(0);
index d3c3aa9..3b34d81 100644 (file)
@@ -35,10 +35,10 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
                        guest_cpuid->entries[i].index,
                        &eax, &ebx, &ecx, &edx);
 
-               GUEST_ASSERT(eax == guest_cpuid->entries[i].eax &&
-                            ebx == guest_cpuid->entries[i].ebx &&
-                            ecx == guest_cpuid->entries[i].ecx &&
-                            edx == guest_cpuid->entries[i].edx);
+               GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+               GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+               GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+               GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
        }
 
 }
@@ -51,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid)
 
        GUEST_SYNC(2);
 
-       GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001);
+       GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
 
        GUEST_DONE();
 }
@@ -116,7 +116,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
        case UCALL_DONE:
                return;
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+               REPORT_GUEST_ASSERT(uc);
        default:
                TEST_ASSERT(false, "Unexpected exit: %s",
                            exit_reason_str(vcpu->run->exit_reason));
index beb7e2c..634c6bf 100644 (file)
@@ -72,7 +72,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
 
                vcpu_run(vcpu);
 
-               ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+               TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
 
                vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
 
@@ -179,12 +179,12 @@ static void run_test(enum vm_guest_mode mode, void *unused)
         * with that capability.
         */
        if (dirty_log_manual_caps) {
-               ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
-               ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
-               ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+               TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+               TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
        } else {
-               ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
-               ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+               TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
        }
 
        /*
@@ -192,9 +192,9 @@ static void run_test(enum vm_guest_mode mode, void *unused)
         * memory again, the page counts should be the same as they were
         * right after initial population of memory.
         */
-       ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
-       ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
-       ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+       TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
+       TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+       TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
 }
 
 static void help(char *name)
index e334844..6c2e5e0 100644 (file)
@@ -35,7 +35,7 @@ int main(int argc, char *argv[])
        vcpu_run(vcpu);
        handle_flds_emulation_failure_exit(vcpu);
        vcpu_run(vcpu);
-       ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+       TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
 
        kvm_vm_free(vm);
        return 0;
index 73af44d..e036db1 100644 (file)
@@ -8,7 +8,6 @@
  * Copyright 2022 Google LLC
  * Author: Vipin Sharma <vipinsh@google.com>
  */
-
 #include "kvm_util.h"
 #include "processor.h"
 #include "hyperv.h"
@@ -84,7 +83,7 @@ int main(void)
 
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "arg1 = %ld, arg2 = %ld");
+               REPORT_GUEST_ASSERT(uc);
                break;
        case UCALL_DONE:
                break;
index 78606de..9f28aa2 100644 (file)
@@ -53,16 +53,21 @@ static void guest_msr(struct msr_data *msr)
                vector = rdmsr_safe(msr->idx, &msr_val);
 
        if (msr->fault_expected)
-               GUEST_ASSERT_3(vector == GP_VECTOR, msr->idx, vector, GP_VECTOR);
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
+                              msr->idx, msr->write ? "WR" : "RD", vector);
        else
-               GUEST_ASSERT_3(!vector, msr->idx, vector, 0);
+               __GUEST_ASSERT(!vector,
+                              "Expected success on %sMSR(0x%x), got vector '0x%x'",
+                              msr->idx, msr->write ? "WR" : "RD", vector);
 
        if (vector || is_write_only_msr(msr->idx))
                goto done;
 
        if (msr->write)
-               GUEST_ASSERT_3(msr_val == msr->write_val, msr->idx,
-                              msr_val, msr->write_val);
+               __GUEST_ASSERT(!vector,
+                              "WRMSR(0x%x) to '0x%llx', RDMSR read '0x%llx'",
+                              msr->idx, msr->write_val, msr_val);
 
        /* Invariant TSC bit appears when TSC invariant control MSR is written to */
        if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
@@ -82,7 +87,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
        u64 res, input, output;
        uint8_t vector;
 
-       GUEST_ASSERT(hcall->control);
+       GUEST_ASSERT_NE(hcall->control, 0);
 
        wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
        wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
@@ -96,10 +101,14 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 
        vector = __hyperv_hypercall(hcall->control, input, output, &res);
        if (hcall->ud_expected) {
-               GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
+               __GUEST_ASSERT(vector == UD_VECTOR,
+                              "Expected #UD for control '%u', got vector '0x%x'",
+                              hcall->control, vector);
        } else {
-               GUEST_ASSERT_2(!vector, hcall->control, vector);
-               GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res);
+               __GUEST_ASSERT(!vector,
+                              "Expected no exception for control '%u', got vector '0x%x'",
+                              hcall->control, vector);
+               GUEST_ASSERT_EQ(res, hcall->expect);
        }
 
        GUEST_DONE();
@@ -495,7 +504,7 @@ static void guest_test_msrs_access(void)
 
                switch (get_ucall(vcpu, &uc)) {
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_3(uc, "MSR = %lx, arg1 = %lx, arg2 = %lx");
+                       REPORT_GUEST_ASSERT(uc);
                        return;
                case UCALL_DONE:
                        break;
@@ -665,7 +674,7 @@ static void guest_test_hcalls_access(void)
 
                switch (get_ucall(vcpu, &uc)) {
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx");
+                       REPORT_GUEST_ASSERT(uc);
                        return;
                case UCALL_DONE:
                        break;
index f774a9e..9e2879a 100644 (file)
@@ -46,10 +46,10 @@ static void test_msr(struct msr_data *msr)
        PR_MSR(msr);
 
        vector = rdmsr_safe(msr->idx, &ignored);
-       GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
 
        vector = wrmsr_safe(msr->idx, 0);
-       GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+       GUEST_ASSERT_EQ(vector, GP_VECTOR);
 }
 
 struct hcall_data {
@@ -77,7 +77,7 @@ static void test_hcall(struct hcall_data *hc)
 
        PR_HCALL(hc);
        r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
-       GUEST_ASSERT(r == -KVM_ENOSYS);
+       GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
 }
 
 static void guest_main(void)
@@ -125,7 +125,7 @@ static void enter_guest(struct kvm_vcpu *vcpu)
                        pr_hcall(&uc);
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_1(uc, "vector = %lu");
+                       REPORT_GUEST_ASSERT(uc);
                        return;
                case UCALL_DONE:
                        return;
index 7281264..80aa3d8 100644 (file)
@@ -16,14 +16,25 @@ enum monitor_mwait_testcases {
        MWAIT_DISABLED = BIT(2),
 };
 
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector)             \
+do {                                                                   \
+       bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) &&      \
+                           ((testcase) & MWAIT_DISABLED);              \
+                                                                       \
+       if (fault_wanted)                                               \
+               __GUEST_ASSERT((vector) == UD_VECTOR,                   \
+                              "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", vector); \
+       else                                                            \
+               __GUEST_ASSERT(!(vector),                               \
+                              "Expected success on " insn " for testcase '0x%x', got '0x%x'", vector); \
+} while (0)
+
 static void guest_monitor_wait(int testcase)
 {
-       /*
-        * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD,
-        * in all other scenarios KVM should emulate them as nops.
-        */
-       bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) &&
-                           (testcase & MWAIT_DISABLED);
        u8 vector;
 
        GUEST_SYNC(testcase);
@@ -33,16 +44,10 @@ static void guest_monitor_wait(int testcase)
         * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
         */
        vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       if (fault_wanted)
-               GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
-       else
-               GUEST_ASSERT_2(!vector, testcase, vector);
+       GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
 
        vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
-       if (fault_wanted)
-               GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
-       else
-               GUEST_ASSERT_2(!vector, testcase, vector);
+       GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
 }
 
 static void guest_code(void)
@@ -85,7 +90,7 @@ int main(int argc, char *argv[])
                        testcase = uc.args[1];
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld");
+                       REPORT_GUEST_ASSERT(uc);
                        goto done;
                case UCALL_DONE:
                        goto done;
index 6502aa2..3670331 100644 (file)
@@ -180,9 +180,7 @@ static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
                            "Expected L2 to ask for %d, L2 says it's done", vector);
                break;
        case UCALL_ABORT:
-               TEST_FAIL("%s at %s:%ld (0x%lx != 0x%lx)",
-                         (const char *)uc.args[0], __FILE__, uc.args[1],
-                         uc.args[2], uc.args[3]);
+               REPORT_GUEST_ASSERT(uc);
                break;
        default:
                TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
@@ -247,12 +245,12 @@ int main(int argc, char *argv[])
 
        /* Verify the pending events comes back out the same as it went in. */
        vcpu_events_get(vcpu, &events);
-       ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
-                 KVM_VCPUEVENT_VALID_PAYLOAD);
-       ASSERT_EQ(events.exception.pending, true);
-       ASSERT_EQ(events.exception.nr, SS_VECTOR);
-       ASSERT_EQ(events.exception.has_error_code, true);
-       ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+       TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+                       KVM_VCPUEVENT_VALID_PAYLOAD);
+       TEST_ASSERT_EQ(events.exception.pending, true);
+       TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+       TEST_ASSERT_EQ(events.exception.has_error_code, true);
+       TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
 
        /*
         * Run for real with the pending #SS, L1 should get a VM-Exit due to
index 40507ed..283cc55 100644 (file)
 #define ARCH_PERFMON_BRANCHES_RETIRED          5
 
 #define NUM_BRANCHES 42
+#define INTEL_PMC_IDX_FIXED            32
+
+/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
+#define MAX_FILTER_EVENTS              300
+#define MAX_TEST_EVENTS                10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION                (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS                 (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS               (MAX_FILTER_EVENTS + 1)
 
 /*
  * This is how the event selector and unit mask are stored in an AMD
 
 #define INST_RETIRED EVENT(0xc0, 0)
 
+struct __kvm_pmu_event_filter {
+       __u32 action;
+       __u32 nevents;
+       __u32 fixed_counter_bitmap;
+       __u32 flags;
+       __u32 pad[4];
+       __u64 events[MAX_FILTER_EVENTS];
+};
+
 /*
  * This event list comprises Intel's eight architectural events plus
  * AMD's "retired branch instructions" for Zen[123] (and possibly
  * other AMD CPUs).
  */
-static const uint64_t event_list[] = {
-       EVENT(0x3c, 0),
-       INST_RETIRED,
-       EVENT(0x3c, 1),
-       EVENT(0x2e, 0x4f),
-       EVENT(0x2e, 0x41),
-       EVENT(0xc4, 0),
-       EVENT(0xc5, 0),
-       EVENT(0xa4, 1),
-       AMD_ZEN_BR_RETIRED,
+static const struct __kvm_pmu_event_filter base_event_filter = {
+       .nevents = ARRAY_SIZE(base_event_filter.events),
+       .events = {
+               EVENT(0x3c, 0),
+               INST_RETIRED,
+               EVENT(0x3c, 1),
+               EVENT(0x2e, 0x4f),
+               EVENT(0x2e, 0x41),
+               EVENT(0xc4, 0),
+               EVENT(0xc5, 0),
+               EVENT(0xa4, 1),
+               AMD_ZEN_BR_RETIRED,
+       },
 };
 
 struct {
@@ -225,48 +246,11 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
        return !r;
 }
 
-static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
-{
-       struct kvm_pmu_event_filter *f;
-       int size = sizeof(*f) + nevents * sizeof(f->events[0]);
-
-       f = malloc(size);
-       TEST_ASSERT(f, "Out of memory");
-       memset(f, 0, size);
-       f->nevents = nevents;
-       return f;
-}
-
-
-static struct kvm_pmu_event_filter *
-create_pmu_event_filter(const uint64_t event_list[], int nevents,
-                       uint32_t action, uint32_t flags)
-{
-       struct kvm_pmu_event_filter *f;
-       int i;
-
-       f = alloc_pmu_event_filter(nevents);
-       f->action = action;
-       f->flags = flags;
-       for (i = 0; i < nevents; i++)
-               f->events[i] = event_list[i];
-
-       return f;
-}
-
-static struct kvm_pmu_event_filter *event_filter(uint32_t action)
-{
-       return create_pmu_event_filter(event_list,
-                                      ARRAY_SIZE(event_list),
-                                      action, 0);
-}
-
 /*
  * Remove the first occurrence of 'event' (if any) from the filter's
  * event list.
  */
-static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
-                                                uint64_t event)
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
 {
        bool found = false;
        int i;
@@ -279,7 +263,6 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
        }
        if (found)
                f->nevents--;
-       return f;
 }
 
 #define ASSERT_PMC_COUNTING_INSTRUCTIONS()                                             \
@@ -315,66 +298,73 @@ static void test_without_filter(struct kvm_vcpu *vcpu)
 }
 
 static void test_with_filter(struct kvm_vcpu *vcpu,
-                            struct kvm_pmu_event_filter *f)
+                            struct __kvm_pmu_event_filter *__f)
 {
+       struct kvm_pmu_event_filter *f = (void *)__f;
+
        vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
        run_vcpu_and_sync_pmc_results(vcpu);
 }
 
 static void test_amd_deny_list(struct kvm_vcpu *vcpu)
 {
-       uint64_t event = EVENT(0x1C2, 0);
-       struct kvm_pmu_event_filter *f;
+       struct __kvm_pmu_event_filter f = {
+               .action = KVM_PMU_EVENT_DENY,
+               .nevents = 1,
+               .events = {
+                       EVENT(0x1C2, 0),
+               },
+       };
 
-       f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0);
-       test_with_filter(vcpu, f);
-       free(f);
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
 }
 
 static void test_member_deny_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+       struct __kvm_pmu_event_filter f = base_event_filter;
 
-       test_with_filter(vcpu, f);
-       free(f);
+       f.action = KVM_PMU_EVENT_DENY;
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
 }
 
 static void test_member_allow_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+       struct __kvm_pmu_event_filter f = base_event_filter;
 
-       test_with_filter(vcpu, f);
-       free(f);
+       f.action = KVM_PMU_EVENT_ALLOW;
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
 }
 
 static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+       struct __kvm_pmu_event_filter f = base_event_filter;
 
-       remove_event(f, INST_RETIRED);
-       remove_event(f, INTEL_BR_RETIRED);
-       remove_event(f, AMD_ZEN_BR_RETIRED);
-       test_with_filter(vcpu, f);
-       free(f);
+       f.action = KVM_PMU_EVENT_DENY;
+
+       remove_event(&f, INST_RETIRED);
+       remove_event(&f, INTEL_BR_RETIRED);
+       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
 }
 
 static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
 {
-       struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = KVM_PMU_EVENT_ALLOW;
 
-       remove_event(f, INST_RETIRED);
-       remove_event(f, INTEL_BR_RETIRED);
-       remove_event(f, AMD_ZEN_BR_RETIRED);
-       test_with_filter(vcpu, f);
-       free(f);
+       remove_event(&f, INST_RETIRED);
+       remove_event(&f, INTEL_BR_RETIRED);
+       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       test_with_filter(vcpu, &f);
 
        ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
 }
@@ -569,19 +559,16 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu,
                                   const uint64_t masked_events[],
                                   const int nmasked_events)
 {
-       struct kvm_pmu_event_filter *f;
+       struct __kvm_pmu_event_filter f = {
+               .nevents = nmasked_events,
+               .action = KVM_PMU_EVENT_ALLOW,
+               .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+       };
 
-       f = create_pmu_event_filter(masked_events, nmasked_events,
-                                   KVM_PMU_EVENT_ALLOW,
-                                   KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
-       test_with_filter(vcpu, f);
-       free(f);
+       memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+       test_with_filter(vcpu, &f);
 }
 
-/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
-#define MAX_FILTER_EVENTS      300
-#define MAX_TEST_EVENTS                10
-
 #define ALLOW_LOADS            BIT(0)
 #define ALLOW_STORES           BIT(1)
 #define ALLOW_LOADS_STORES     BIT(2)
@@ -753,21 +740,33 @@ static void test_masked_events(struct kvm_vcpu *vcpu)
        run_masked_events_tests(vcpu, events, nevents);
 }
 
-static int run_filter_test(struct kvm_vcpu *vcpu, const uint64_t *events,
-                          int nevents, uint32_t flags)
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+                               struct __kvm_pmu_event_filter *__f)
 {
-       struct kvm_pmu_event_filter *f;
-       int r;
+       struct kvm_pmu_event_filter *f = (void *)__f;
 
-       f = create_pmu_event_filter(events, nevents, KVM_PMU_EVENT_ALLOW, flags);
-       r = __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
-       free(f);
+       return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
 
-       return r;
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+                                      uint32_t flags, uint32_t action)
+{
+       struct __kvm_pmu_event_filter f = {
+               .nevents = 1,
+               .flags = flags,
+               .action = action,
+               .events = {
+                       event,
+               },
+       };
+
+       return set_pmu_event_filter(vcpu, &f);
 }
 
 static void test_filter_ioctl(struct kvm_vcpu *vcpu)
 {
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct __kvm_pmu_event_filter f;
        uint64_t e = ~0ul;
        int r;
 
@@ -775,15 +774,144 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu)
         * Unfortunately having invalid bits set in event data is expected to
         * pass when flags == 0 (bits other than eventsel+umask).
         */
-       r = run_filter_test(vcpu, &e, 1, 0);
+       r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
        TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
 
-       r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
        TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
 
        e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
-       r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
+       r = set_pmu_single_event_filter(vcpu, e,
+                                       KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+                                       KVM_PMU_EVENT_ALLOW);
        TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+       f = base_event_filter;
+       f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+       f = base_event_filter;
+       f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+       f = base_event_filter;
+       f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+       f = base_event_filter;
+       f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+       r = set_pmu_event_filter(vcpu, &f);
+       TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx)
+{
+       for (;;) {
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0);
+
+               /* Only OS_EN bit is enabled for fixed counter[idx]. */
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL,
+                     BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx));
+               __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx));
+       }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+                                              uint32_t action, uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = {
+               .action = action,
+               .fixed_counter_bitmap = bitmap,
+       };
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+                                                  uint32_t action,
+                                                  uint32_t bitmap)
+{
+       struct __kvm_pmu_event_filter f = base_event_filter;
+
+       f.action = action;
+       f.fixed_counter_bitmap = bitmap;
+       set_pmu_event_filter(vcpu, &f);
+
+       return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+                                       uint8_t nr_fixed_counters)
+{
+       unsigned int i;
+       uint32_t bitmap;
+       uint64_t count;
+
+       TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+                   "Invalid nr_fixed_counters");
+
+       /*
+        * Check the fixed performance counter can count normally when KVM
+        * userspace doesn't set any pmu filter.
+        */
+       count = run_vcpu_to_sync(vcpu);
+       TEST_ASSERT(count, "Unexpected count value: %ld\n", count);
+
+       for (i = 0; i < BIT(nr_fixed_counters); i++) {
+               bitmap = BIT(i);
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+                                                      bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+               /*
+                * Check that fixed_counter_bitmap has higher priority than
+                * events[] when both are set.
+                */
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_ALLOW,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+               count = test_set_gp_and_fixed_event_filter(vcpu,
+                                                          KVM_PMU_EVENT_DENY,
+                                                          bitmap);
+               TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+       }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       struct kvm_vm *vm;
+       struct kvm_vcpu *vcpu;
+       uint8_t idx;
+
+       /*
+        * Check that pmu_event_filter works as expected when it's applied to
+        * fixed performance counters.
+        */
+       for (idx = 0; idx < nr_fixed_counters; idx++) {
+               vm = vm_create_with_one_vcpu(&vcpu,
+                                            intel_run_fixed_counter_guest_code);
+               vcpu_args_set(vcpu, 1, idx);
+               __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+               kvm_vm_free(vm);
+       }
 }
 
 int main(int argc, char *argv[])
@@ -829,6 +957,7 @@ int main(int argc, char *argv[])
        kvm_vm_free(vm);
 
        test_pmu_config_disable(guest_code);
+       test_fixed_counter_bitmap();
 
        return 0;
 }
index 4c416eb..cbc92a8 100644 (file)
@@ -57,7 +57,7 @@ int main(void)
        for (i = 0; i < KVM_MAX_VCPUS; i++)
                vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
 
-       ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
 
        vcpuN = vcpus[KVM_MAX_VCPUS - 1];
        for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
@@ -65,8 +65,8 @@ int main(void)
                vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
        }
 
-       ASSERT_EQ(pthread_cancel(thread), 0);
-       ASSERT_EQ(pthread_join(thread, NULL), 0);
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
 
        kvm_vm_free(vm);
 
index b25d755..366cf18 100644 (file)
@@ -20,7 +20,7 @@ static void guest_bsp_vcpu(void *arg)
 {
        GUEST_SYNC(1);
 
-       GUEST_ASSERT(get_bsp_flag() != 0);
+       GUEST_ASSERT_NE(get_bsp_flag(), 0);
 
        GUEST_DONE();
 }
@@ -29,7 +29,7 @@ static void guest_not_bsp_vcpu(void *arg)
 {
        GUEST_SYNC(1);
 
-       GUEST_ASSERT(get_bsp_flag() == 0);
+       GUEST_ASSERT_EQ(get_bsp_flag(), 0);
 
        GUEST_DONE();
 }
@@ -65,7 +65,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu)
                                        stage);
                        break;
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+                       REPORT_GUEST_ASSERT(uc);
                default:
                        TEST_ASSERT(false, "Unexpected exit: %s",
                                    exit_reason_str(vcpu->run->exit_reason));
index 4e24797..7ee4449 100644 (file)
@@ -8,7 +8,6 @@
  *   Copyright (C) 2021, Red Hat, Inc.
  *
  */
-
 #include <stdatomic.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -34,13 +33,12 @@ static void l2_guest_code_int(void);
 static void guest_int_handler(struct ex_regs *regs)
 {
        int_fired++;
-       GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int,
-                      regs->rip, (unsigned long)l2_guest_code_int);
+       GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
 }
 
 static void l2_guest_code_int(void)
 {
-       GUEST_ASSERT_1(int_fired == 1, int_fired);
+       GUEST_ASSERT_EQ(int_fired, 1);
 
        /*
          * Same as the vmmcall() function, but with a ud2 sneaked after the
@@ -53,7 +51,7 @@ static void l2_guest_code_int(void)
                              : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
                                "r10", "r11", "r12", "r13", "r14", "r15");
 
-       GUEST_ASSERT_1(bp_fired == 1, bp_fired);
+       GUEST_ASSERT_EQ(bp_fired, 1);
        hlt();
 }
 
@@ -66,9 +64,9 @@ static void guest_nmi_handler(struct ex_regs *regs)
 
        if (nmi_stage_get() == 1) {
                vmmcall();
-               GUEST_ASSERT(false);
+               GUEST_FAIL("Unexpected resume after VMMCALL");
        } else {
-               GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get());
+               GUEST_ASSERT_EQ(nmi_stage_get(), 3);
                GUEST_DONE();
        }
 }
@@ -104,7 +102,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
        }
 
        run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+                      "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
                       vmcb->control.exit_code,
                       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
 
@@ -112,7 +111,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
                clgi();
                x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
 
-               GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get());
+               GUEST_ASSERT_EQ(nmi_stage_get(), 1);
                nmi_stage_inc();
 
                stgi();
@@ -133,7 +132,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
        vmcb->control.next_rip = vmcb->save.rip + 2;
 
        run_guest(vmcb, svm->vmcb_gpa);
-       GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT,
+       __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+                      "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
                       vmcb->control.exit_code,
                       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
 
@@ -185,7 +185,7 @@ static void run_test(bool is_nmi)
 
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx");
+               REPORT_GUEST_ASSERT(uc);
                break;
                /* NOT REACHED */
        case UCALL_DONE:
index 2da89fd..00965ba 100644 (file)
@@ -15,6 +15,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/ioctl.h>
+#include <pthread.h>
 
 #include "test_util.h"
 #include "kvm_util.h"
@@ -80,6 +81,133 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
 #define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
 #define INVALID_SYNC_FIELD 0x80000000
 
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       WRITE_ONCE(events->exception.nr, UD_VECTOR);
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.injected, 1);
+               WRITE_ONCE(events->exception.pending, 1);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events.  KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       struct kvm_vcpu_events *events = &run->s.regs.events;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+               WRITE_ONCE(events->flags, 0);
+               WRITE_ONCE(events->exception.nr, UD_VECTOR);
+               WRITE_ONCE(events->exception.pending, 1);
+               WRITE_ONCE(events->exception.nr, 255);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+       struct kvm_run *run = (struct kvm_run *)arg;
+       __u64 *cr4 = &run->s.regs.sregs.cr4;
+       __u64 pae_enabled = *cr4;
+       __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+       for (;;) {
+               WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+               WRITE_ONCE(*cr4, pae_enabled);
+               asm volatile(".rept 512\n\t"
+                            "nop\n\t"
+                            ".endr");
+               WRITE_ONCE(*cr4, pae_disabled);
+
+               pthread_testcancel();
+       }
+
+       return NULL;
+}
+
+static void race_sync_regs(void *racer)
+{
+       const time_t TIMEOUT = 2; /* seconds, roughly */
+       struct kvm_x86_state *state;
+       struct kvm_translation tr;
+       struct kvm_vcpu *vcpu;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       pthread_t thread;
+       time_t t;
+
+       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+       run = vcpu->run;
+
+       run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+       vcpu_run(vcpu);
+       run->kvm_valid_regs = 0;
+
+       /* Save state *before* spawning the thread that mucks with vCPU state. */
+       state = vcpu_save_state(vcpu);
+
+       /*
+        * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+        * should already be set in guest state.
+        */
+       TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+                   (run->s.regs.sregs.efer & EFER_LME),
+                   "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+                   !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+                   !!(run->s.regs.sregs.efer & EFER_LME));
+
+       TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+       for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+               /*
+                * Reload known good state if the vCPU triple faults, e.g. due
+                * to the unhandled #GPs being injected.  VMX preserves state
+                * on shutdown, but SVM synthesizes an INIT as the VMCB state
+                * is architecturally undefined on triple fault.
+                */
+               if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+                       vcpu_load_state(vcpu, state);
+
+               if (racer == race_sregs_cr4) {
+                       tr = (struct kvm_translation) { .linear_address = 0 };
+                       __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+               }
+       }
+
+       TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+       TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+       kvm_x86_state_cleanup(state);
+       kvm_vm_free(vm);
+}
+
 int main(int argc, char *argv[])
 {
        struct kvm_vcpu *vcpu;
@@ -218,5 +346,9 @@ int main(int argc, char *argv[])
 
        kvm_vm_free(vm);
 
+       race_sync_regs(race_sregs_cr4);
+       race_sync_regs(race_events_exc);
+       race_sync_regs(race_events_inj_pen);
+
        return 0;
 }
index c9f6770..12b0964 100644 (file)
@@ -84,7 +84,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
                ksft_test_result_pass("stage %d passed\n", stage + 1);
                return;
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+               REPORT_GUEST_ASSERT(uc);
        default:
                TEST_ASSERT(false, "Unexpected exit: %s",
                            exit_reason_str(vcpu->run->exit_reason));
@@ -103,39 +103,39 @@ int main(void)
        vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
        val = 0;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /* Guest: writes to MSR_IA32_TSC affect both MSRs.  */
        run_vcpu(vcpu, 1);
        val = 1ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs.  */
        run_vcpu(vcpu, 2);
        val = 2ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /*
         * Host: writes to MSR_IA32_TSC set the host-side offset
         * and therefore do not change MSR_IA32_TSC_ADJUST.
         */
        vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
        run_vcpu(vcpu, 3);
 
        /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC.  */
        vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
 
        /* Restore previous value.  */
        vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /*
         * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
@@ -143,8 +143,8 @@ int main(void)
         */
        run_vcpu(vcpu, 4);
        val = 3ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
 
        /*
         * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
@@ -152,8 +152,8 @@ int main(void)
         */
        run_vcpu(vcpu, 5);
        val = 4ull * GUEST_STEP;
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
-       ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+       TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
 
        kvm_vm_free(vm);
 
index 0cb51fa..255c50b 100644 (file)
@@ -20,8 +20,8 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count)
                end = (unsigned long)buffer + 8192;
 
        asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
-       GUEST_ASSERT_1(count == 0, count);
-       GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end);
+       GUEST_ASSERT_EQ(count, 0);
+       GUEST_ASSERT_EQ((unsigned long)buffer, end);
 }
 
 static void guest_code(void)
@@ -43,7 +43,9 @@ static void guest_code(void)
        memset(buffer, 0, sizeof(buffer));
        guest_ins_port80(buffer, 8192);
        for (i = 0; i < 8192; i++)
-               GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]);
+               __GUEST_ASSERT(buffer[i] == 0xaa,
+                              "Expected '0xaa', got '0x%x' at buffer[%u]",
+                              buffer[i], i);
 
        GUEST_DONE();
 }
@@ -91,7 +93,7 @@ int main(int argc, char *argv[])
        case UCALL_DONE:
                break;
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx");
+               REPORT_GUEST_ASSERT(uc);
        default:
                TEST_FAIL("Unknown ucall %lu", uc.cmd);
        }
index be0bdb8..a9b827c 100644 (file)
@@ -50,7 +50,7 @@ static void set_timer(void)
        timer.it_value.tv_sec  = 0;
        timer.it_value.tv_usec = 200;
        timer.it_interval = timer.it_value;
-       ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+       TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
 }
 
 static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
index 4c90f76..ebbcb0a 100644 (file)
@@ -10,7 +10,6 @@
  * and check it can be retrieved with KVM_GET_MSR, also test
  * the invalid LBR formats are rejected.
  */
-
 #define _GNU_SOURCE /* for program_invocation_short_name */
 #include <sys/ioctl.h>
 
@@ -52,23 +51,24 @@ static const union perf_capabilities format_caps = {
        .pebs_format = -1,
 };
 
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+       uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+       __GUEST_ASSERT(vector == GP_VECTOR,
+                      "Expected #GP for value '0x%llx', got vector '0x%x'",
+                      val, vector);
+}
+
 static void guest_code(uint64_t current_val)
 {
-       uint8_t vector;
        int i;
 
-       vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val);
-       GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector);
-
-       vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0);
-       GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector);
+       guest_test_perf_capabilities_gp(current_val);
+       guest_test_perf_capabilities_gp(0);
 
-       for (i = 0; i < 64; i++) {
-               vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES,
-                                   current_val ^ BIT_ULL(i));
-               GUEST_ASSERT_2(vector == GP_VECTOR,
-                              current_val ^ BIT_ULL(i), vector);
-       }
+       for (i = 0; i < 64; i++)
+               guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
 
        GUEST_DONE();
 }
@@ -95,7 +95,7 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
 
        switch (get_ucall(vcpu, &uc)) {
        case UCALL_ABORT:
-               REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu");
+               REPORT_GUEST_ASSERT(uc);
                break;
        case UCALL_DONE:
                break;
@@ -103,7 +103,8 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
                TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
        }
 
-       ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities);
+       TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+                       host_cap.capabilities);
 
        vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
 
index 396c13f..ab75b87 100644 (file)
@@ -65,17 +65,17 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
        vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
 
        vcpu_run(vcpu);
-       ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
-       ASSERT_EQ(uc.args[1], val);
+       TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+       TEST_ASSERT_EQ(uc.args[1], val);
 
        vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
        icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
              (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
        if (!x->is_x2apic) {
                val &= (-1u | (0xffull << (32 + 24)));
-               ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+               TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
        } else {
-               ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+               TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
        }
 }
 
index 905bd5a..77d04a7 100644 (file)
@@ -4,7 +4,6 @@
  *
  * Copyright (C) 2022, Google LLC.
  */
-
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
  * Assert that architectural dependency rules are satisfied, e.g. that AVX is
  * supported if and only if SSE is supported.
  */
-#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)    \
-do {                                                                             \
-       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
-                                                                                 \
-       GUEST_ASSERT_3((__supported & (xfeatures)) != (xfeatures) ||              \
-                      __supported == ((xfeatures) | (dependencies)),             \
-                      __supported, (xfeatures), (dependencies));                 \
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)          \
+do {                                                                                   \
+       uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies));       \
+                                                                                       \
+       __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) ||                    \
+                      __supported == ((xfeatures) | (dependencies)),                   \
+                      "supported = 0x%llx, xfeatures = 0x%llx, dependencies = 0x%llx", \
+                      __supported, (xfeatures), (dependencies));                       \
 } while (0)
 
 /*
@@ -41,7 +41,8 @@ do {                                                                            \
 do {                                                                   \
        uint64_t __supported = (supported_xcr0) & (xfeatures);          \
                                                                        \
-       GUEST_ASSERT_2(!__supported || __supported == (xfeatures),      \
+       __GUEST_ASSERT(!__supported || __supported == (xfeatures),      \
+                      "supported = 0x%llx, xfeatures = 0x%llx",        \
                       __supported, (xfeatures));                       \
 } while (0)
 
@@ -79,14 +80,18 @@ static void guest_code(void)
                                    XFEATURE_MASK_XTILE);
 
        vector = xsetbv_safe(0, supported_xcr0);
-       GUEST_ASSERT_2(!vector, supported_xcr0, vector);
+       __GUEST_ASSERT(!vector,
+                      "Expected success on XSETBV(0x%llx), got vector '0x%x'",
+                      supported_xcr0, vector);
 
        for (i = 0; i < 64; i++) {
                if (supported_xcr0 & BIT_ULL(i))
                        continue;
 
                vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
-               GUEST_ASSERT_3(vector == GP_VECTOR, supported_xcr0, vector, BIT_ULL(i));
+               __GUEST_ASSERT(vector == GP_VECTOR,
+                              "Expected #GP on XSETBV(0x%llx), supported XCR0 = %llx, got vector '0x%x'",
+                              BIT_ULL(i), supported_xcr0, vector);
        }
 
        GUEST_DONE();
@@ -117,7 +122,7 @@ int main(int argc, char *argv[])
 
                switch (get_ucall(vcpu, &uc)) {
                case UCALL_ABORT:
-                       REPORT_GUEST_ASSERT_3(uc, "0x%lx 0x%lx 0x%lx");
+                       REPORT_GUEST_ASSERT(uc);
                        break;
                case UCALL_DONE:
                        goto done;
index c94cde3..e149d05 100644 (file)
@@ -108,16 +108,16 @@ int main(int argc, char *argv[])
                vcpu_run(vcpu);
 
                if (run->exit_reason == KVM_EXIT_XEN) {
-                       ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
-                       ASSERT_EQ(run->xen.u.hcall.cpl, 0);
-                       ASSERT_EQ(run->xen.u.hcall.longmode, 1);
-                       ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
-                       ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
-                       ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
-                       ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
-                       ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
-                       ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
-                       ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+                       TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+                       TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
                        run->xen.u.hcall.result = RETVALUE;
                        continue;
                }
index 83d5655..2515943 100644 (file)
@@ -113,7 +113,7 @@ static bool supports_filesystem(const char *const filesystem)
 {
        char str[32];
        int len;
-       bool res;
+       bool res = true;
        FILE *const inf = fopen("/proc/filesystems", "r");
 
        /*
@@ -125,14 +125,16 @@ static bool supports_filesystem(const char *const filesystem)
 
        /* filesystem can be null for bind mounts. */
        if (!filesystem)
-               return true;
+               goto out;
 
        len = snprintf(str, sizeof(str), "nodev\t%s\n", filesystem);
        if (len >= sizeof(str))
                /* Ignores too-long filesystem names. */
-               return true;
+               goto out;
 
        res = fgrep(inf, str);
+
+out:
        fclose(inf);
        return res;
 }
index d328af4..e7d2a53 100755 (executable)
@@ -12,7 +12,8 @@ ksft_skip=4
 TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
        ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
        ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
-       ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test"
+       ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
+       ipv4_mpath_list ipv6_mpath_list"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -2352,6 +2353,156 @@ ipv4_bcast_neigh_test()
        cleanup
 }
 
+mpath_dep_check()
+{
+       if [ ! -x "$(command -v mausezahn)" ]; then
+               echo "mausezahn command not found. Skipping test"
+               return 1
+       fi
+
+       if [ ! -x "$(command -v jq)" ]; then
+               echo "jq command not found. Skipping test"
+               return 1
+       fi
+
+       if [ ! -x "$(command -v bc)" ]; then
+               echo "bc command not found. Skipping test"
+               return 1
+       fi
+
+       if [ ! -x "$(command -v perf)" ]; then
+               echo "perf command not found. Skipping test"
+               return 1
+       fi
+
+       perf list fib:* | grep -q fib_table_lookup
+       if [ $? -ne 0 ]; then
+               echo "IPv4 FIB tracepoint not found. Skipping test"
+               return 1
+       fi
+
+       perf list fib6:* | grep -q fib6_table_lookup
+       if [ $? -ne 0 ]; then
+               echo "IPv6 FIB tracepoint not found. Skipping test"
+               return 1
+       fi
+
+       return 0
+}
+
+link_stats_get()
+{
+       local ns=$1; shift
+       local dev=$1; shift
+       local dir=$1; shift
+       local stat=$1; shift
+
+       ip -n $ns -j -s link show dev $dev \
+               | jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
+}
+
+list_rcv_eval()
+{
+       local file=$1; shift
+       local expected=$1; shift
+
+       local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor')
+       local ratio=$(echo "scale=2; $count / $expected" | bc -l)
+       local res=$(echo "$ratio >= 0.95" | bc)
+       [[ $res -eq 1 ]]
+       log_test $? 0 "Multipath route hit ratio ($ratio)"
+}
+
+ipv4_mpath_list_test()
+{
+       echo
+       echo "IPv4 multipath list receive tests"
+
+       mpath_dep_check || return 1
+
+       route_setup
+
+       set -e
+       run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+       run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+       run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+       run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+       run_cmd "ip -n ns2 link add name nh1 up type dummy"
+       run_cmd "ip -n ns2 link add name nh2 up type dummy"
+       run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1"
+       run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2"
+       run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+       run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+       run_cmd "ip -n ns2 route add 203.0.113.0/24
+               nexthop via 172.16.201.2 nexthop via 172.16.202.2"
+       run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+       set +e
+
+       local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+       local tmp_file=$(mktemp)
+       local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac
+               -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
+
+       # Packets forwarded in a list using a multipath route must not reuse a
+       # cached result so that a flow always hits the same nexthop. In other
+       # words, the FIB lookup tracepoint needs to be triggered for every
+       # packet.
+       local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+       run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+       local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+       local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+       list_rcv_eval $tmp_file $diff
+
+       rm $tmp_file
+       route_cleanup
+}
+
+ipv6_mpath_list_test()
+{
+       echo
+       echo "IPv6 multipath list receive tests"
+
+       mpath_dep_check || return 1
+
+       route_setup
+
+       set -e
+       run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+       run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+       run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+       run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+       run_cmd "ip -n ns2 link add name nh1 up type dummy"
+       run_cmd "ip -n ns2 link add name nh2 up type dummy"
+       run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1"
+       run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2"
+       run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+       run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+       run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64
+               nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2"
+       run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
+       set +e
+
+       local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+       local tmp_file=$(mktemp)
+       local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac
+               -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
+
+       # Packets forwarded in a list using a multipath route must not reuse a
+       # cached result so that a flow always hits the same nexthop. In other
+       # words, the FIB lookup tracepoint needs to be triggered for every
+       # packet.
+       local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+       run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+       local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+       local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+       list_rcv_eval $tmp_file $diff
+
+       rm $tmp_file
+       route_cleanup
+}
+
 ################################################################################
 # usage
 
@@ -2433,6 +2584,8 @@ do
        ipv6_mangle)                    ipv6_mangle_test;;
        ipv4_bcast_neigh)               ipv4_bcast_neigh_test;;
        fib6_gc_test|ipv6_gc)           fib6_gc_test;;
+       ipv4_mpath_list)                ipv4_mpath_list_test;;
+       ipv6_mpath_list)                ipv6_mpath_list_test;;
 
        help) echo "Test names: $TESTS"; exit 0;;
        esac
index b74916d..484d087 100644 (file)
@@ -62,9 +62,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT
 config KVM_VFIO
        bool
 
-config HAVE_KVM_ARCH_TLB_FLUSH_ALL
-       bool
-
 config HAVE_KVM_INVALID_WAKEUPS
        bool
 
index 2500178..486800a 100644 (file)
@@ -345,7 +345,6 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 }
 EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
 
-#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
        ++kvm->stat.generic.remote_tlb_flush_requests;
@@ -361,12 +360,38 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
         * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
         * barrier here.
         */
-       if (!kvm_arch_flush_remote_tlb(kvm)
+       if (!kvm_arch_flush_remote_tlbs(kvm)
            || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
                ++kvm->stat.generic.remote_tlb_flush;
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
-#endif
+
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
+{
+       if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages))
+               return;
+
+       /*
+        * Fall back to a flushing entire TLBs if the architecture range-based
+        * TLB invalidation is unsupported or can't be performed for whatever
+        * reason.
+        */
+       kvm_flush_remote_tlbs(kvm);
+}
+
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+                                  const struct kvm_memory_slot *memslot)
+{
+       /*
+        * All current use cases for flushing the TLBs for a specific memslot
+        * are related to dirty logging, and many do the TLB flush out of
+        * mmu_lock. The interaction between the various operations on memslot
+        * must be serialized by slots_locks to ensure the TLB flush from one
+        * operation is observed by any other operation on the same memslot.
+        */
+       lockdep_assert_held(&kvm->slots_lock);
+       kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
+}
 
 static void kvm_flush_shadow_all(struct kvm *kvm)
 {
@@ -526,7 +551,7 @@ typedef void (*on_unlock_fn_t)(struct kvm *kvm);
 struct kvm_hva_range {
        unsigned long start;
        unsigned long end;
-       pte_t pte;
+       union kvm_mmu_notifier_arg arg;
        hva_handler_t handler;
        on_lock_fn_t on_lock;
        on_unlock_fn_t on_unlock;
@@ -547,6 +572,8 @@ static void kvm_null_fn(void)
 }
 #define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
 
+static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG;
+
 /* Iterate over each memslot intersecting [start, last] (inclusive) range */
 #define kvm_for_each_memslot_in_hva_range(node, slots, start, last)         \
        for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
@@ -591,7 +618,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
                         * bother making these conditional (to avoid writes on
                         * the second or later invocation of the handler).
                         */
-                       gfn_range.pte = range->pte;
+                       gfn_range.arg = range->arg;
                        gfn_range.may_block = range->may_block;
 
                        /*
@@ -632,14 +659,14 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
                                                unsigned long start,
                                                unsigned long end,
-                                               pte_t pte,
+                                               union kvm_mmu_notifier_arg arg,
                                                hva_handler_t handler)
 {
        struct kvm *kvm = mmu_notifier_to_kvm(mn);
        const struct kvm_hva_range range = {
                .start          = start,
                .end            = end,
-               .pte            = pte,
+               .arg            = arg,
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
                .on_unlock      = (void *)kvm_null_fn,
@@ -659,7 +686,6 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
        const struct kvm_hva_range range = {
                .start          = start,
                .end            = end,
-               .pte            = __pte(0),
                .handler        = handler,
                .on_lock        = (void *)kvm_null_fn,
                .on_unlock      = (void *)kvm_null_fn,
@@ -693,6 +719,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
                                        pte_t pte)
 {
        struct kvm *kvm = mmu_notifier_to_kvm(mn);
+       const union kvm_mmu_notifier_arg arg = { .pte = pte };
 
        trace_kvm_set_spte_hva(address);
 
@@ -708,7 +735,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
        if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
                return;
 
-       kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn);
+       kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn);
 }
 
 void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
@@ -747,7 +774,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
        const struct kvm_hva_range hva_range = {
                .start          = range->start,
                .end            = range->end,
-               .pte            = __pte(0),
                .handler        = kvm_unmap_gfn_range,
                .on_lock        = kvm_mmu_invalidate_begin,
                .on_unlock      = kvm_arch_guest_memory_reclaimed,
@@ -812,7 +838,6 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
        const struct kvm_hva_range hva_range = {
                .start          = range->start,
                .end            = range->end,
-               .pte            = __pte(0),
                .handler        = (void *)kvm_null_fn,
                .on_lock        = kvm_mmu_invalidate_end,
                .on_unlock      = (void *)kvm_null_fn,
@@ -845,7 +870,8 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 {
        trace_kvm_age_hva(start, end);
 
-       return kvm_handle_hva_range(mn, start, end, __pte(0), kvm_age_gfn);
+       return kvm_handle_hva_range(mn, start, end, KVM_MMU_NOTIFIER_NO_ARG,
+                                   kvm_age_gfn);
 }
 
 static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
@@ -2180,7 +2206,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
        }
 
        if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+               kvm_flush_remote_tlbs_memslot(kvm, memslot);
 
        if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
                return -EFAULT;
@@ -2297,7 +2323,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
        KVM_MMU_UNLOCK(kvm);
 
        if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+               kvm_flush_remote_tlbs_memslot(kvm, memslot);
 
        return 0;
 }